Add support for single single duplication of metadata
authorChris Mason <chris.mason@oracle.com>
Thu, 3 Apr 2008 20:35:48 +0000 (16:35 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Thu, 3 Apr 2008 20:35:48 +0000 (16:35 -0400)
ctree.h
extent-tree.c
mkfs.c
volumes.c

diff --git a/ctree.h b/ctree.h
index b90a567..ccd9319 100644 (file)
--- a/ctree.h
+++ b/ctree.h
@@ -407,6 +407,7 @@ struct btrfs_csum_item {
 #define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
 #define BTRFS_BLOCK_GROUP_RAID0    (1 << 3)
 #define BTRFS_BLOCK_GROUP_RAID1    (1 << 4)
+#define BTRFS_BLOCK_GROUP_DUP     (1 << 5)
 
 struct btrfs_block_group_item {
        __le64 used;
index d845ada..ba44600 100644 (file)
@@ -1618,30 +1618,21 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        struct btrfs_extent_item *extent_item;
        struct btrfs_extent_ref *ref;
        struct btrfs_key keys[2];
-       int extra_alloc_flags = 0;
-
-       if (0 && btrfs_super_num_devices(&info->super_copy) > 1) {
-               if (data)
-                       extra_alloc_flags = BTRFS_BLOCK_GROUP_RAID0;
-               else
-                       extra_alloc_flags = BTRFS_BLOCK_GROUP_RAID1;
-       }
 
        if (data) {
-               data = BTRFS_BLOCK_GROUP_DATA | extra_alloc_flags;
+               data = BTRFS_BLOCK_GROUP_DATA;
        } else if (root == root->fs_info->chunk_root ||
                   info->force_system_allocs) {
                data = BTRFS_BLOCK_GROUP_SYSTEM;
        } else {
-               data = BTRFS_BLOCK_GROUP_METADATA | extra_alloc_flags;
+               data = BTRFS_BLOCK_GROUP_METADATA;
        }
 
        if (root->ref_cows) {
                if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
                        ret = do_chunk_alloc(trans, root->fs_info->extent_root,
                                             num_bytes,
-                                            BTRFS_BLOCK_GROUP_METADATA |
-                                            extra_alloc_flags);
+                                            BTRFS_BLOCK_GROUP_METADATA);
                        BUG_ON(ret);
                }
                ret = do_chunk_alloc(trans, root->fs_info->extent_root,
diff --git a/mkfs.c b/mkfs.c
index 0d2ca38..64426c3 100644 (file)
--- a/mkfs.c
+++ b/mkfs.c
@@ -139,6 +139,49 @@ err:
        return ret;
 }
 
+static int create_one_raid_group(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root, u64 type)
+{
+       u64 chunk_start;
+       u64 chunk_size;
+       int ret;
+
+       ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+                               &chunk_start, &chunk_size, type);
+       BUG_ON(ret);
+       ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
+                                    type, BTRFS_CHUNK_TREE_OBJECTID,
+                                    chunk_start, chunk_size);
+       BUG_ON(ret);
+       return ret;
+}
+
+static int create_raid_groups(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root, u64 data_profile,
+                             u64 metadata_profile)
+{
+       u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
+       u64 allowed;
+       int ret;
+
+       if (num_devices == 1)
+               allowed = BTRFS_BLOCK_GROUP_DUP;
+       else
+               allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1;
+
+       ret = create_one_raid_group(trans, root,
+                                   BTRFS_BLOCK_GROUP_METADATA |
+                                   (allowed & metadata_profile));
+       BUG_ON(ret);
+       if (num_devices > 1) {
+               ret = create_one_raid_group(trans, root,
+                                           BTRFS_BLOCK_GROUP_DATA |
+                                           (allowed & data_profile));
+               BUG_ON(ret);
+       }
+       return 0;
+}
+
 static void print_usage(void)
 {
        fprintf(stderr, "usage: mkfs.btrfs [options] dev [ dev ... ]\n");
@@ -163,8 +206,6 @@ int main(int ac, char **av)
        char *file;
        u64 block_count = 0;
        u64 dev_block_count = 0;
-       u64 chunk_start;
-       u64 chunk_size;
        int fd;
        int first_fd;
        int ret;
@@ -255,17 +296,17 @@ int main(int ac, char **av)
               file, nodesize, leafsize, sectorsize,
               (unsigned long long)block_count);
 
+       root = open_ctree(file, 0);
+       trans = btrfs_start_transaction(root, 1);
+
        if (ac == 0)
-               goto done;
+               goto raid_groups;
 
        btrfs_register_one_device(file);
-       root = open_ctree(file, 0);
-
        if (!root) {
                fprintf(stderr, "ctree init failed\n");
                return -1;
        }
-       trans = btrfs_start_transaction(root, 1);
 
        zero_end = 1;
        while(ac-- > 0) {
@@ -298,32 +339,13 @@ int main(int ac, char **av)
                btrfs_register_one_device(file);
        }
 
-       ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
-                               &chunk_start, &chunk_size,
-                               BTRFS_BLOCK_GROUP_METADATA |
-                               BTRFS_BLOCK_GROUP_RAID1);
-       BUG_ON(ret);
-       ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
-                                    BTRFS_BLOCK_GROUP_METADATA |
-                                    BTRFS_BLOCK_GROUP_RAID1,
-                                    BTRFS_CHUNK_TREE_OBJECTID,
-                                    chunk_start, chunk_size);
-       BUG_ON(ret);
-       ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
-                               &chunk_start, &chunk_size,
-                               BTRFS_BLOCK_GROUP_DATA |
-                               BTRFS_BLOCK_GROUP_RAID0);
-       BUG_ON(ret);
-       ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
-                                    BTRFS_BLOCK_GROUP_DATA |
-                                    BTRFS_BLOCK_GROUP_RAID0,
-                                    BTRFS_CHUNK_TREE_OBJECTID,
-                                    chunk_start, chunk_size);
-       BUG_ON(ret);
+raid_groups:
+       ret = create_raid_groups(trans, root, BTRFS_BLOCK_GROUP_RAID0,
+                                BTRFS_BLOCK_GROUP_RAID1 |
+                                BTRFS_BLOCK_GROUP_DUP);
        btrfs_commit_transaction(trans, root);
        ret = close_ctree(root);
        BUG_ON(ret);
-done:
        return 0;
 }
 
index ef90872..b5f6b8b 100644 (file)
--- a/volumes.c
+++ b/volumes.c
@@ -570,6 +570,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        struct map_lookup *map;
        u64 physical;
        u64 calc_size = 8 * 1024 * 1024;
+       u64 min_free = calc_size;
        u64 avail;
        u64 max_avail = 0;
        int num_stripes = 1;
@@ -579,22 +580,30 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        int stripe_len = 64 * 1024;
        struct btrfs_key key;
 
-       if (list_empty(dev_list))
+       if (list_empty(dev_list)) {
                return -ENOSPC;
+       }
 
-       if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) {
+       if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+                   BTRFS_BLOCK_GROUP_DUP)) {
                calc_size = 1024 * 1024 * 1024;
        }
-       if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
+       if (type & BTRFS_BLOCK_GROUP_RAID1) {
                num_stripes = min_t(u64, 2,
                                  btrfs_super_num_devices(&info->super_copy));
        }
+       if (type & BTRFS_BLOCK_GROUP_DUP)
+               num_stripes = 2;
        if (type & (BTRFS_BLOCK_GROUP_RAID0))
                num_stripes = btrfs_super_num_devices(&info->super_copy);
 again:
        INIT_LIST_HEAD(&private_devs);
        cur = dev_list->next;
        index = 0;
+
+       if (type & BTRFS_BLOCK_GROUP_DUP)
+               min_free = calc_size * 2;
+
        /* build a private list of devices we will allocate from */
        while(index < num_stripes) {
                device = list_entry(cur, struct btrfs_device, dev_list);
@@ -602,9 +611,11 @@ again:
                cur = cur->next;
                if (avail > max_avail)
                        max_avail = avail;
-               if (avail >= calc_size) {
+               if (avail >= min_free) {
                        list_move_tail(&device->dev_list, &private_devs);
                        index++;
+                       if (type & BTRFS_BLOCK_GROUP_DUP)
+                               index++;
                }
                if (cur == dev_list)
                        break;
@@ -635,23 +646,28 @@ again:
 
        stripes = &chunk->stripe;
 
-       if (type & BTRFS_BLOCK_GROUP_RAID1)
+       if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
                *num_bytes = calc_size;
        else
                *num_bytes = calc_size * num_stripes;
 
        index = 0;
+printk("new chunk type %Lu start %Lu size %Lu\n", type, key.objectid, *num_bytes);
        while(index < num_stripes) {
                BUG_ON(list_empty(&private_devs));
                cur = private_devs.next;
                device = list_entry(cur, struct btrfs_device, dev_list);
-               list_move_tail(&device->dev_list, dev_list);
+
+               /* loop over this device again if we're doing a dup group */
+               if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
+                   (index == num_stripes - 1))
+                       list_move_tail(&device->dev_list, dev_list);
 
                ret = btrfs_alloc_dev_extent(trans, device,
                                             key.objectid,
                                             calc_size, &dev_offset);
                BUG_ON(ret);
-printk("alloc chunk size %llu from dev %llu\n",
+printk("\talloc chunk size %llu from dev %llu\n",
        (unsigned long long)calc_size,
        (unsigned long long)device->devid);
                device->bytes_used += calc_size;
@@ -744,6 +760,14 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
                        stripe_index = stripe_nr % map->num_stripes;
                        *total_devs = 1;
                }
+       } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
+               if (rw == WRITE) {
+                       *total_devs = map->num_stripes;
+                       stripe_index = dev_nr;
+               } else {
+                       stripe_index = 0;
+                       *total_devs = 1;
+               }
        } else {
                /*
                 * after this do_div call, stripe_nr is the number of stripes
@@ -757,7 +781,8 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        *phys = map->stripes[stripe_index].physical + stripe_offset +
                stripe_nr * map->stripe_len;
 
-       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) {
+       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+                        BTRFS_BLOCK_GROUP_DUP)) {
                /* we limit the length of each bio to what fits in a stripe */
                *length = min_t(u64, ce->size - offset,
                              map->stripe_len - stripe_offset);