X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=fs%2Fbtrfs%2Fvolumes.c;h=e0b7bb92a170c3ad529fca30ed9f6539646dbbe5;hb=d756bd2d9339447c29bde950910586df8f8941ec;hp=fbb493b28d5a9c817cb75c3feec421579e74c2e0;hpb=b367e47fb3a70f5d24ebd6faf7d42436d485fb2d;p=platform%2Fadaptation%2Frenesas_rcar%2Frenesas_kernel.git diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fbb493b..e0b7bb9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "compat.h" #include "ctree.h" @@ -1280,7 +1281,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) bool clear_super = false; mutex_lock(&uuid_mutex); - mutex_lock(&root->fs_info->volume_mutex); all_avail = root->fs_info->avail_data_alloc_bits | root->fs_info->avail_system_alloc_bits | @@ -1450,7 +1450,6 @@ error_close: if (bdev) blkdev_put(bdev, FMODE_READ | FMODE_EXCL); out: - mutex_unlock(&root->fs_info->volume_mutex); mutex_unlock(&uuid_mutex); return ret; error_undo: @@ -1626,7 +1625,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } filemap_write_and_wait(bdev->bd_inode->i_mapping); - mutex_lock(&root->fs_info->volume_mutex); devices = &root->fs_info->fs_devices->devices; /* @@ -1754,8 +1752,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = btrfs_relocate_sys_chunks(root); BUG_ON(ret); } -out: - mutex_unlock(&root->fs_info->volume_mutex); + return ret; error: blkdev_put(bdev, FMODE_EXCL); @@ -1763,7 +1760,7 @@ error: mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); } - goto out; + return ret; } static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, @@ -2074,6 +2071,362 @@ error: return ret; } +static int insert_balance_item(struct btrfs_root *root, + struct btrfs_balance_control *bctl) +{ + struct btrfs_trans_handle *trans; + struct btrfs_balance_item *item; + struct btrfs_disk_balance_args disk_bargs; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + int ret, err; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + + key.objectid = BTRFS_BALANCE_OBJECTID; + key.type = BTRFS_BALANCE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(*item)); + if (ret) + goto out; + + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); + + memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); + + btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data); + btrfs_set_balance_data(leaf, item, &disk_bargs); + btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta); + btrfs_set_balance_meta(leaf, item, &disk_bargs); + btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys); + btrfs_set_balance_sys(leaf, item, &disk_bargs); + + btrfs_set_balance_flags(leaf, item, bctl->flags); + + btrfs_mark_buffer_dirty(leaf); +out: + btrfs_free_path(path); + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; + return ret; +} + +static int del_balance_item(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + struct btrfs_path *path; + struct btrfs_key key; + int ret, err; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + + key.objectid = BTRFS_BALANCE_OBJECTID; + key.type = BTRFS_BALANCE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; + return ret; +} + +/* + * This is a heuristic used to reduce the number of chunks balanced on + * resume after balance was interrupted. + */ +static void update_balance_args(struct btrfs_balance_control *bctl) +{ + /* + * Turn on soft mode for chunk types that were being converted. + */ + if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) + bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT; + if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) + bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT; + if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) + bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT; + + /* + * Turn on usage filter if is not already used. The idea is + * that chunks that we have already balanced should be + * reasonably full. Don't do it for chunks that are being + * converted - that will keep us from relocating unconverted + * (albeit full) chunks. + */ + if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) && + !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) { + bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE; + bctl->data.usage = 90; + } + if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) && + !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) { + bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE; + bctl->sys.usage = 90; + } + if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) && + !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) { + bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE; + bctl->meta.usage = 90; + } +} + +/* + * Should be called with both balance and volume mutexes held to + * serialize other volume operations (add_dev/rm_dev/resize) with + * restriper. Same goes for unset_balance_control. + */ +static void set_balance_control(struct btrfs_balance_control *bctl) +{ + struct btrfs_fs_info *fs_info = bctl->fs_info; + + BUG_ON(fs_info->balance_ctl); + + spin_lock(&fs_info->balance_lock); + fs_info->balance_ctl = bctl; + spin_unlock(&fs_info->balance_lock); +} + +static void unset_balance_control(struct btrfs_fs_info *fs_info) +{ + struct btrfs_balance_control *bctl = fs_info->balance_ctl; + + BUG_ON(!fs_info->balance_ctl); + + spin_lock(&fs_info->balance_lock); + fs_info->balance_ctl = NULL; + spin_unlock(&fs_info->balance_lock); + + kfree(bctl); +} + +/* + * Balance filters. Return 1 if chunk should be filtered out + * (should not be balanced). + */ +static int chunk_profiles_filter(u64 chunk_profile, + struct btrfs_balance_args *bargs) +{ + chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; + + if (chunk_profile == 0) + chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE; + + if (bargs->profiles & chunk_profile) + return 0; + + return 1; +} + +static u64 div_factor_fine(u64 num, int factor) +{ + if (factor <= 0) + return 0; + if (factor >= 100) + return num; + + num *= factor; + do_div(num, 100); + return num; +} + +static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, + struct btrfs_balance_args *bargs) +{ + struct btrfs_block_group_cache *cache; + u64 chunk_used, user_thresh; + int ret = 1; + + cache = btrfs_lookup_block_group(fs_info, chunk_offset); + chunk_used = btrfs_block_group_used(&cache->item); + + user_thresh = div_factor_fine(cache->key.offset, bargs->usage); + if (chunk_used < user_thresh) + ret = 0; + + btrfs_put_block_group(cache); + return ret; +} + +static int chunk_devid_filter(struct extent_buffer *leaf, + struct btrfs_chunk *chunk, + struct btrfs_balance_args *bargs) +{ + struct btrfs_stripe *stripe; + int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + int i; + + for (i = 0; i < num_stripes; i++) { + stripe = btrfs_stripe_nr(chunk, i); + if (btrfs_stripe_devid(leaf, stripe) == bargs->devid) + return 0; + } + + return 1; +} + +/* [pstart, pend) */ +static int chunk_drange_filter(struct extent_buffer *leaf, + struct btrfs_chunk *chunk, + u64 chunk_offset, + struct btrfs_balance_args *bargs) +{ + struct btrfs_stripe *stripe; + int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + u64 stripe_offset; + u64 stripe_length; + int factor; + int i; + + if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID)) + return 0; + + if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) + factor = 2; + else + factor = 1; + factor = num_stripes / factor; + + for (i = 0; i < num_stripes; i++) { + stripe = btrfs_stripe_nr(chunk, i); + if (btrfs_stripe_devid(leaf, stripe) != bargs->devid) + continue; + + stripe_offset = btrfs_stripe_offset(leaf, stripe); + stripe_length = btrfs_chunk_length(leaf, chunk); + do_div(stripe_length, factor); + + if (stripe_offset < bargs->pend && + stripe_offset + stripe_length > bargs->pstart) + return 0; + } + + return 1; +} + +/* [vstart, vend) */ +static int chunk_vrange_filter(struct extent_buffer *leaf, + struct btrfs_chunk *chunk, + u64 chunk_offset, + struct btrfs_balance_args *bargs) +{ + if (chunk_offset < bargs->vend && + chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart) + /* at least part of the chunk is inside this vrange */ + return 0; + + return 1; +} + +static int chunk_soft_convert_filter(u64 chunk_profile, + struct btrfs_balance_args *bargs) +{ + if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT)) + return 0; + + chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; + + if (chunk_profile == 0) + chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE; + + if (bargs->target & chunk_profile) + return 1; + + return 0; +} + +static int should_balance_chunk(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 chunk_offset) +{ + struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; + struct btrfs_balance_args *bargs = NULL; + u64 chunk_type = btrfs_chunk_type(leaf, chunk); + + /* type filter */ + if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) & + (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) { + return 0; + } + + if (chunk_type & BTRFS_BLOCK_GROUP_DATA) + bargs = &bctl->data; + else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) + bargs = &bctl->sys; + else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA) + bargs = &bctl->meta; + + /* profiles filter */ + if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) && + chunk_profiles_filter(chunk_type, bargs)) { + return 0; + } + + /* usage filter */ + if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) && + chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) { + return 0; + } + + /* devid filter */ + if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) && + chunk_devid_filter(leaf, chunk, bargs)) { + return 0; + } + + /* drange filter, makes sense only with devid filter */ + if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) && + chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) { + return 0; + } + + /* vrange filter */ + if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) && + chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) { + return 0; + } + + /* soft profile changing mode */ + if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) && + chunk_soft_convert_filter(chunk_type, bargs)) { + return 0; + } + + return 1; +} + static u64 div_factor(u64 num, int factor) { if (factor == 10) @@ -2083,29 +2436,28 @@ static u64 div_factor(u64 num, int factor) return num; } -int btrfs_balance(struct btrfs_root *dev_root) +static int __btrfs_balance(struct btrfs_fs_info *fs_info) { - int ret; - struct list_head *devices = &dev_root->fs_info->fs_devices->devices; + struct btrfs_balance_control *bctl = fs_info->balance_ctl; + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_root *dev_root = fs_info->dev_root; + struct list_head *devices; struct btrfs_device *device; u64 old_size; u64 size_to_free; + struct btrfs_chunk *chunk; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; - struct btrfs_trans_handle *trans; struct btrfs_key found_key; - - if (dev_root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - mutex_lock(&dev_root->fs_info->volume_mutex); - dev_root = dev_root->fs_info->dev_root; + struct btrfs_trans_handle *trans; + struct extent_buffer *leaf; + int slot; + int ret; + int enospc_errors = 0; + bool counting = true; /* step one make some room on all the devices */ + devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { old_size = device->total_bytes; size_to_free = div_factor(old_size, 1); @@ -2134,11 +2486,23 @@ int btrfs_balance(struct btrfs_root *dev_root) ret = -ENOMEM; goto error; } + + /* zero out stat counters */ + spin_lock(&fs_info->balance_lock); + memset(&bctl->stat, 0, sizeof(bctl->stat)); + spin_unlock(&fs_info->balance_lock); +again: key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; while (1) { + if ((!counting && atomic_read(&fs_info->balance_pause_req)) || + atomic_read(&fs_info->balance_cancel_req)) { + ret = -ECANCELED; + goto error; + } + ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); if (ret < 0) goto error; @@ -2148,15 +2512,19 @@ int btrfs_balance(struct btrfs_root *dev_root) * failed */ if (ret == 0) - break; + BUG(); /* FIXME break ? */ ret = btrfs_previous_item(chunk_root, path, 0, BTRFS_CHUNK_ITEM_KEY); - if (ret) + if (ret) { + ret = 0; break; + } + + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); - btrfs_item_key_to_cpu(path->nodes[0], &found_key, - path->slots[0]); if (found_key.objectid != key.objectid) break; @@ -2164,22 +2532,375 @@ int btrfs_balance(struct btrfs_root *dev_root) if (found_key.offset == 0) break; + chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + + if (!counting) { + spin_lock(&fs_info->balance_lock); + bctl->stat.considered++; + spin_unlock(&fs_info->balance_lock); + } + + ret = should_balance_chunk(chunk_root, leaf, chunk, + found_key.offset); btrfs_release_path(path); + if (!ret) + goto loop; + + if (counting) { + spin_lock(&fs_info->balance_lock); + bctl->stat.expected++; + spin_unlock(&fs_info->balance_lock); + goto loop; + } + ret = btrfs_relocate_chunk(chunk_root, chunk_root->root_key.objectid, found_key.objectid, found_key.offset); if (ret && ret != -ENOSPC) goto error; + if (ret == -ENOSPC) { + enospc_errors++; + } else { + spin_lock(&fs_info->balance_lock); + bctl->stat.completed++; + spin_unlock(&fs_info->balance_lock); + } +loop: key.offset = found_key.offset - 1; } - ret = 0; + + if (counting) { + btrfs_release_path(path); + counting = false; + goto again; + } error: btrfs_free_path(path); - mutex_unlock(&dev_root->fs_info->volume_mutex); + if (enospc_errors) { + printk(KERN_INFO "btrfs: %d enospc errors during balance\n", + enospc_errors); + if (!ret) + ret = -ENOSPC; + } + return ret; } +static inline int balance_need_close(struct btrfs_fs_info *fs_info) +{ + /* cancel requested || normal exit path */ + return atomic_read(&fs_info->balance_cancel_req) || + (atomic_read(&fs_info->balance_pause_req) == 0 && + atomic_read(&fs_info->balance_cancel_req) == 0); +} + +static void __cancel_balance(struct btrfs_fs_info *fs_info) +{ + int ret; + + unset_balance_control(fs_info); + ret = del_balance_item(fs_info->tree_root); + BUG_ON(ret); +} + +void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, + struct btrfs_ioctl_balance_args *bargs); + +/* + * Should be called with both balance and volume mutexes held + */ +int btrfs_balance(struct btrfs_balance_control *bctl, + struct btrfs_ioctl_balance_args *bargs) +{ + struct btrfs_fs_info *fs_info = bctl->fs_info; + u64 allowed; + int ret; + + if (btrfs_fs_closing(fs_info) || + atomic_read(&fs_info->balance_pause_req) || + atomic_read(&fs_info->balance_cancel_req)) { + ret = -EINVAL; + goto out; + } + + /* + * In case of mixed groups both data and meta should be picked, + * and identical options should be given for both of them. + */ + allowed = btrfs_super_incompat_flags(fs_info->super_copy); + if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && + (bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) { + if (!(bctl->flags & BTRFS_BALANCE_DATA) || + !(bctl->flags & BTRFS_BALANCE_METADATA) || + memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { + printk(KERN_ERR "btrfs: with mixed groups data and " + "metadata balance options must be the same\n"); + ret = -EINVAL; + goto out; + } + } + + /* + * Profile changing sanity checks. Skip them if a simple + * balance is requested. + */ + if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) & + BTRFS_BALANCE_ARGS_CONVERT)) + goto do_balance; + + allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; + if (fs_info->fs_devices->num_devices == 1) + allowed |= BTRFS_BLOCK_GROUP_DUP; + else if (fs_info->fs_devices->num_devices < 4) + allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); + else + allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10); + + if (!profile_is_valid(bctl->data.target, 1) || + bctl->data.target & ~allowed) { + printk(KERN_ERR "btrfs: unable to start balance with target " + "data profile %llu\n", + (unsigned long long)bctl->data.target); + ret = -EINVAL; + goto out; + } + if (!profile_is_valid(bctl->meta.target, 1) || + bctl->meta.target & ~allowed) { + printk(KERN_ERR "btrfs: unable to start balance with target " + "metadata profile %llu\n", + (unsigned long long)bctl->meta.target); + ret = -EINVAL; + goto out; + } + if (!profile_is_valid(bctl->sys.target, 1) || + bctl->sys.target & ~allowed) { + printk(KERN_ERR "btrfs: unable to start balance with target " + "system profile %llu\n", + (unsigned long long)bctl->sys.target); + ret = -EINVAL; + goto out; + } + + if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) { + printk(KERN_ERR "btrfs: dup for data is not allowed\n"); + ret = -EINVAL; + goto out; + } + + /* allow to reduce meta or sys integrity only if force set */ + allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10; + if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (fs_info->avail_system_alloc_bits & allowed) && + !(bctl->sys.target & allowed)) || + ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (fs_info->avail_metadata_alloc_bits & allowed) && + !(bctl->meta.target & allowed))) { + if (bctl->flags & BTRFS_BALANCE_FORCE) { + printk(KERN_INFO "btrfs: force reducing metadata " + "integrity\n"); + } else { + printk(KERN_ERR "btrfs: balance will reduce metadata " + "integrity, use force if you want this\n"); + ret = -EINVAL; + goto out; + } + } + +do_balance: + ret = insert_balance_item(fs_info->tree_root, bctl); + if (ret && ret != -EEXIST) + goto out; + + if (!(bctl->flags & BTRFS_BALANCE_RESUME)) { + BUG_ON(ret == -EEXIST); + set_balance_control(bctl); + } else { + BUG_ON(ret != -EEXIST); + spin_lock(&fs_info->balance_lock); + update_balance_args(bctl); + spin_unlock(&fs_info->balance_lock); + } + + atomic_inc(&fs_info->balance_running); + mutex_unlock(&fs_info->balance_mutex); + + ret = __btrfs_balance(fs_info); + + mutex_lock(&fs_info->balance_mutex); + atomic_dec(&fs_info->balance_running); + + if (bargs) { + memset(bargs, 0, sizeof(*bargs)); + update_ioctl_balance_args(fs_info, 0, bargs); + } + + if ((ret && ret != -ECANCELED && ret != -ENOSPC) || + balance_need_close(fs_info)) { + __cancel_balance(fs_info); + } + + wake_up(&fs_info->balance_wait_q); + + return ret; +out: + if (bctl->flags & BTRFS_BALANCE_RESUME) + __cancel_balance(fs_info); + else + kfree(bctl); + return ret; +} + +static int balance_kthread(void *data) +{ + struct btrfs_balance_control *bctl = + (struct btrfs_balance_control *)data; + struct btrfs_fs_info *fs_info = bctl->fs_info; + int ret = 0; + + mutex_lock(&fs_info->volume_mutex); + mutex_lock(&fs_info->balance_mutex); + + set_balance_control(bctl); + + if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { + printk(KERN_INFO "btrfs: force skipping balance\n"); + } else { + printk(KERN_INFO "btrfs: continuing balance\n"); + ret = btrfs_balance(bctl, NULL); + } + + mutex_unlock(&fs_info->balance_mutex); + mutex_unlock(&fs_info->volume_mutex); + return ret; +} + +int btrfs_recover_balance(struct btrfs_root *tree_root) +{ + struct task_struct *tsk; + struct btrfs_balance_control *bctl; + struct btrfs_balance_item *item; + struct btrfs_disk_balance_args disk_bargs; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + bctl = kzalloc(sizeof(*bctl), GFP_NOFS); + if (!bctl) { + ret = -ENOMEM; + goto out; + } + + key.objectid = BTRFS_BALANCE_OBJECTID; + key.type = BTRFS_BALANCE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + if (ret < 0) + goto out_bctl; + if (ret > 0) { /* ret = -ENOENT; */ + ret = 0; + goto out_bctl; + } + + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); + + bctl->fs_info = tree_root->fs_info; + bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; + + btrfs_balance_data(leaf, item, &disk_bargs); + btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); + btrfs_balance_meta(leaf, item, &disk_bargs); + btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs); + btrfs_balance_sys(leaf, item, &disk_bargs); + btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); + + tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); + if (IS_ERR(tsk)) + ret = PTR_ERR(tsk); + else + goto out; + +out_bctl: + kfree(bctl); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_pause_balance(struct btrfs_fs_info *fs_info) +{ + int ret = 0; + + mutex_lock(&fs_info->balance_mutex); + if (!fs_info->balance_ctl) { + mutex_unlock(&fs_info->balance_mutex); + return -ENOTCONN; + } + + if (atomic_read(&fs_info->balance_running)) { + atomic_inc(&fs_info->balance_pause_req); + mutex_unlock(&fs_info->balance_mutex); + + wait_event(fs_info->balance_wait_q, + atomic_read(&fs_info->balance_running) == 0); + + mutex_lock(&fs_info->balance_mutex); + /* we are good with balance_ctl ripped off from under us */ + BUG_ON(atomic_read(&fs_info->balance_running)); + atomic_dec(&fs_info->balance_pause_req); + } else { + ret = -ENOTCONN; + } + + mutex_unlock(&fs_info->balance_mutex); + return ret; +} + +int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) +{ + mutex_lock(&fs_info->balance_mutex); + if (!fs_info->balance_ctl) { + mutex_unlock(&fs_info->balance_mutex); + return -ENOTCONN; + } + + atomic_inc(&fs_info->balance_cancel_req); + /* + * if we are running just wait and return, balance item is + * deleted in btrfs_balance in this case + */ + if (atomic_read(&fs_info->balance_running)) { + mutex_unlock(&fs_info->balance_mutex); + wait_event(fs_info->balance_wait_q, + atomic_read(&fs_info->balance_running) == 0); + mutex_lock(&fs_info->balance_mutex); + } else { + /* __cancel_balance needs volume_mutex */ + mutex_unlock(&fs_info->balance_mutex); + mutex_lock(&fs_info->volume_mutex); + mutex_lock(&fs_info->balance_mutex); + + if (fs_info->balance_ctl) + __cancel_balance(fs_info); + + mutex_unlock(&fs_info->volume_mutex); + } + + BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running)); + atomic_dec(&fs_info->balance_cancel_req); + mutex_unlock(&fs_info->balance_mutex); + return 0; +} + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. @@ -2437,7 +3158,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, max_stripe_size = 1024 * 1024 * 1024; max_chunk_size = 10 * max_stripe_size; } else if (type & BTRFS_BLOCK_GROUP_METADATA) { - max_stripe_size = 256 * 1024 * 1024; + /* for larger filesystems, use larger metadata chunks */ + if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) + max_stripe_size = 1024 * 1024 * 1024; + else + max_stripe_size = 256 * 1024 * 1024; max_chunk_size = max_stripe_size; } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { max_stripe_size = 8 * 1024 * 1024; @@ -2748,8 +3473,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, return ret; alloc_profile = BTRFS_BLOCK_GROUP_METADATA | - (fs_info->metadata_alloc_profile & - fs_info->avail_metadata_alloc_bits); + fs_info->avail_metadata_alloc_bits; alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, @@ -2759,8 +3483,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, sys_chunk_offset = chunk_offset + chunk_size; alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM | - (fs_info->system_alloc_profile & - fs_info->avail_system_alloc_bits); + fs_info->avail_system_alloc_bits; alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, @@ -2937,10 +3660,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, if (rw & REQ_DISCARD) *length = min_t(u64, em->len - offset, *length); - else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10 | - BTRFS_BLOCK_GROUP_DUP)) { + else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, em->len - offset, map->stripe_len - stripe_offset);