From: Chris Mason Date: Wed, 20 Feb 2013 19:06:05 +0000 (-0500) Subject: Merge branch 'raid56-experimental' into for-linus-3.9 X-Git-Tag: v3.9-rc1~4^2~29 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e942f883bc6651d50be139477baf6fb0eed3d5bb;p=profile%2Fivi%2Fkernel-x86-ivi.git Merge branch 'raid56-experimental' into for-linus-3.9 Signed-off-by: Chris Mason Conflicts: fs/btrfs/ctree.h fs/btrfs/extent-tree.c fs/btrfs/inode.c fs/btrfs/volumes.c --- e942f883bc6651d50be139477baf6fb0eed3d5bb diff --cc fs/btrfs/ctree.h index 1679051,e3a4fd7..3dcedfe --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@@ -956,16 -954,10 +958,20 @@@ struct btrfs_dev_replace_item #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) + #define BTRFS_BLOCK_GROUP_RAID5 (1 << 7) + #define BTRFS_BLOCK_GROUP_RAID6 (1 << 8) #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE -#define BTRFS_NR_RAID_TYPES 7 + +enum btrfs_raid_types { + BTRFS_RAID_RAID10, + BTRFS_RAID_RAID1, + BTRFS_RAID_DUP, + BTRFS_RAID_RAID0, + BTRFS_RAID_SINGLE, ++ BTRFS_RAID_RAID5, ++ BTRFS_RAID_RAID6, + BTRFS_NR_RAID_TYPES +}; #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ BTRFS_BLOCK_GROUP_SYSTEM | \ @@@ -1237,11 -1235,23 +1249,28 @@@ struct seq_list u64 seq; }; +enum btrfs_orphan_cleanup_state { + ORPHAN_CLEANUP_STARTED = 1, + ORPHAN_CLEANUP_DONE = 2, +}; + + /* used by the raid56 code to lock stripes for read/modify/write */ + struct btrfs_stripe_hash { + struct list_head hash_list; + wait_queue_head_t wait; + spinlock_t lock; + }; + + /* used by the raid56 code to lock stripes for read/modify/write */ + struct btrfs_stripe_hash_table { + struct list_head stripe_cache; + spinlock_t cache_lock; + int cache_size; + struct btrfs_stripe_hash table[]; + }; + + #define BTRFS_STRIPE_HASH_TABLE_BITS 11 + /* fs_info */ struct reloc_control; struct btrfs_device; diff --cc fs/btrfs/extent-tree.c index 5cd44e2,7e801ad..b3ecca4 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@@ -2505,9 -2547,9 +2552,10 @@@ again ret = run_clustered_refs(trans, root, &cluster); if (ret < 0) { + btrfs_release_ref_cluster(&cluster); spin_unlock(&delayed_refs->lock); btrfs_abort_transaction(trans, root, ret); + atomic_dec(&delayed_refs->procs_running_refs); return ret; } @@@ -5591,16 -5577,23 +5659,20 @@@ wait_block_group_cache_done(struct btrf int __get_raid_index(u64 flags) { - int index; - if (flags & BTRFS_BLOCK_GROUP_RAID10) - index = 0; + return BTRFS_RAID_RAID10; else if (flags & BTRFS_BLOCK_GROUP_RAID1) - index = 1; + return BTRFS_RAID_RAID1; else if (flags & BTRFS_BLOCK_GROUP_DUP) - index = 2; + return BTRFS_RAID_DUP; else if (flags & BTRFS_BLOCK_GROUP_RAID0) - index = 3; + return BTRFS_RAID_RAID0; - else - return BTRFS_RAID_SINGLE; + else if (flags & BTRFS_BLOCK_GROUP_RAID5) - index = 5; ++ return BTRFS_RAID_RAID5; + else if (flags & BTRFS_BLOCK_GROUP_RAID6) - index = 6; - else - index = 4; /* BTRFS_BLOCK_GROUP_SINGLE */ - return index; ++ return BTRFS_RAID_RAID6; ++ ++ return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ } static int get_block_group_index(struct btrfs_block_group_cache *cache) diff --cc fs/btrfs/extent_io.h index ff18232,b14b36a..dc81868 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@@ -72,9 -72,10 +72,9 @@@ struct extent_io_ops int (*writepage_start_hook)(struct page *page, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); extent_submit_bio_hook_t *submit_bio_hook; - int (*merge_bio_hook)(struct page *page, unsigned long offset, + int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); - int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int mirror); diff --cc fs/btrfs/inode.c index 1aa98be,492ee0e..4e6a11c --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@@ -39,7 -39,7 +39,8 @@@ #include #include #include +#include + #include #include "compat.h" #include "ctree.h" #include "disk-io.h" diff --cc fs/btrfs/transaction.c index 955204c,c56b9d4..a83d486 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@@ -167,9 -156,11 +167,12 @@@ loop spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock); + atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0); + atomic_set(&cur_trans->delayed_refs.ref_seq, 0); + init_waitqueue_head(&cur_trans->delayed_refs.wait); INIT_LIST_HEAD(&cur_trans->pending_snapshots); + INIT_LIST_HEAD(&cur_trans->ordered_operations); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); diff --cc fs/btrfs/volumes.c index 72b1cf1,8818dc3..7992dc4 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@@ -3130,27 -3092,26 +3154,29 @@@ int btrfs_balance(struct btrfs_balance_ /* allow to reduce meta or sys integrity only if force set */ allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10; + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6; - - if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (fs_info->avail_system_alloc_bits & allowed) && - !(bctl->sys.target & allowed)) || - ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (fs_info->avail_metadata_alloc_bits & allowed) && - !(bctl->meta.target & allowed))) { - if (bctl->flags & BTRFS_BALANCE_FORCE) { - printk(KERN_INFO "btrfs: force reducing metadata " - "integrity\n"); - } else { - printk(KERN_ERR "btrfs: balance will reduce metadata " - "integrity, use force if you want this\n"); - ret = -EINVAL; - goto out; + do { + seq = read_seqbegin(&fs_info->profiles_lock); + + if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (fs_info->avail_system_alloc_bits & allowed) && + !(bctl->sys.target & allowed)) || + ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (fs_info->avail_metadata_alloc_bits & allowed) && + !(bctl->meta.target & allowed))) { + if (bctl->flags & BTRFS_BALANCE_FORCE) { + printk(KERN_INFO "btrfs: force reducing metadata " + "integrity\n"); + } else { + printk(KERN_ERR "btrfs: balance will reduce metadata " + "integrity, use force if you want this\n"); + ret = -EINVAL; + goto out; + } } - } + } while (read_seqretry(&fs_info->profiles_lock, seq)); if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { int num_tolerated_disk_barrier_failures; @@@ -3199,16 -3169,9 +3225,11 @@@ btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); } - if (cancel) - __cancel_balance(fs_info); + if (bargs) { + memset(bargs, 0, sizeof(*bargs)); + update_ioctl_balance_args(fs_info, 0, bargs); + } - if ((ret && ret != -ECANCELED && ret != -ENOSPC) || - balance_need_close(fs_info)) { - __cancel_balance(fs_info); - } - wake_up(&fs_info->balance_wait_q); return ret; @@@ -3571,48 -3534,45 +3592,86 @@@ static int btrfs_cmp_device_info(const } struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { - /* - * sub_stripes info for map, - * dev_stripes -- stripes per dev, 2 for DUP, 1 other wise - * devs_max -- max devices per stripe, 0 for unlimited - * devs_min -- min devices per stripe - * devs_increment -- ndevs must be a multiple of this - * ncopies -- how many copies of the data we have - */ - { 2, 1, 0, 4, 2, 2 /* raid10 */ }, - { 1, 1, 2, 2, 2, 2 /* raid1 */ }, - { 1, 2, 1, 1, 1, 2 /* dup */ }, - { 1, 1, 0, 2, 1, 1 /* raid0 */ }, - { 1, 1, 1, 1, 1, 1 /* single */ }, - { 1, 1, 0, 2, 1, 2 /* raid5 */ }, - { 1, 1, 0, 3, 1, 3 /* raid6 */ }, + [BTRFS_RAID_RAID10] = { + .sub_stripes = 2, + .dev_stripes = 1, + .devs_max = 0, /* 0 == as many as possible */ + .devs_min = 4, + .devs_increment = 2, + .ncopies = 2, + }, + [BTRFS_RAID_RAID1] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 2, + .devs_min = 2, + .devs_increment = 2, + .ncopies = 2, + }, + [BTRFS_RAID_DUP] = { + .sub_stripes = 1, + .dev_stripes = 2, + .devs_max = 1, + .devs_min = 1, + .devs_increment = 1, + .ncopies = 2, + }, + [BTRFS_RAID_RAID0] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 2, + .devs_increment = 1, + .ncopies = 1, + }, + [BTRFS_RAID_SINGLE] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 1, + .devs_min = 1, + .devs_increment = 1, + .ncopies = 1, + }, ++ [BTRFS_RAID_RAID5] = { ++ .sub_stripes = 1, ++ .dev_stripes = 1, ++ .devs_max = 0, ++ .devs_min = 2, ++ .devs_increment = 1, ++ .ncopies = 2, ++ }, ++ [BTRFS_RAID_RAID6] = { ++ .sub_stripes = 1, ++ .dev_stripes = 1, ++ .devs_max = 0, ++ .devs_min = 3, ++ .devs_increment = 1, ++ .ncopies = 3, ++ }, }; - + + static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) + { + /* TODO allow them to set a preferred stripe size */ + return 64 * 1024; + } + + static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) + { + u64 features; + + if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))) + return; + + features = btrfs_super_incompat_flags(info->super_copy); + if (features & BTRFS_FEATURE_INCOMPAT_RAID56) + return; + + features |= BTRFS_FEATURE_INCOMPAT_RAID56; + btrfs_set_super_incompat_flags(info->super_copy, features); + printk(KERN_INFO "btrfs: setting RAID5/6 feature flag\n"); + } + static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct map_lookup **map_ret, @@@ -3840,19 -3819,14 +3917,21 @@@ info->chunk_root->root_key.objectid, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, dev_offset, stripe_size); - if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); - goto error; - } + if (ret) + goto error_dev_extent; } + ret = btrfs_make_block_group(trans, extent_root, 0, type, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, + start, num_bytes); + if (ret) { + i = map->num_stripes - 1; + goto error_dev_extent; + } + + free_extent_map(em); + check_raid56_incompat_flag(extent_root->fs_info, type); + kfree(devices_info); return 0;