Add mirroring support across multiple drives
authorChris Mason <chris.mason@oracle.com>
Thu, 3 Apr 2008 20:35:48 +0000 (16:35 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Thu, 3 Apr 2008 20:35:48 +0000 (16:35 -0400)
btrfsck.c
ctree.h
disk-io.c
extent-tree.c
kerncompat.h
mkfs.c
utils.c
volumes.c
volumes.h

index aa2256e..9dc085f 100644 (file)
--- a/btrfsck.c
+++ b/btrfsck.c
@@ -480,7 +480,6 @@ static int pick_next_pending(struct cache_tree *pending,
        }
        return ret;
 }
-static struct extent_buffer reada_buf;
 
 static int run_next_block(struct btrfs_root *root,
                          struct block_info *bits,
@@ -503,7 +502,6 @@ static int run_next_block(struct btrfs_root *root,
        struct cache_extent *cache;
        int reada_bits;
 
-       u64 last_block = 0;
        ret = pick_next_pending(pending, reada, nodes, *last, bits,
                                bits_nr, &reada_bits);
        if (ret == 0) {
@@ -511,14 +509,9 @@ static int run_next_block(struct btrfs_root *root,
        }
        if (!reada_bits) {
                for(i = 0; i < ret; i++) {
-                       u64 offset;
                        insert_cache_extent(reada, bits[i].start,
                                            bits[i].size);
-                       btrfs_map_bh_to_logical(root, &reada_buf,
-                                               bits[i].start);
-                       offset = reada_buf.dev_bytenr;
-                       last_block = bits[i].start;
-                       readahead(reada_buf.fd, offset, bits[i].size);
+                       readahead_tree_block(root, bits[i].start, bits[i].size);
                }
        }
        *last = bits[0].start;
diff --git a/ctree.h b/ctree.h
index 1e4c84a..b90a567 100644 (file)
--- a/ctree.h
+++ b/ctree.h
@@ -406,6 +406,7 @@ struct btrfs_csum_item {
 #define BTRFS_BLOCK_GROUP_SYSTEM   (1 << 1)
 #define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
 #define BTRFS_BLOCK_GROUP_RAID0    (1 << 3)
+#define BTRFS_BLOCK_GROUP_RAID1    (1 << 4)
 
 struct btrfs_block_group_item {
        __le64 used;
index 1215394..204abe0 100644 (file)
--- a/disk-io.c
+++ b/disk-io.c
@@ -18,6 +18,7 @@
 
 #define _XOPEN_SOURCE 600
 #define __USE_XOPEN2K
+#define _GNU_SOURCE 1
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
 #include "crc32c.h"
 #include "utils.h"
 
-int btrfs_map_bh_to_logical(struct btrfs_root *root, struct extent_buffer *buf,
-                           u64 logical)
-{
-       u64 physical;
-       u64 length;
-       struct btrfs_device *device;
-       int ret;
-
-       ret = btrfs_map_block(&root->fs_info->mapping_tree, logical, &physical,
-                             &length, &device);
-       BUG_ON(ret);
-       buf->fd = device->fd;
-       buf->dev_bytenr = physical;
-       return 0;
-}
-
 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
 {
        if (buf->start != btrfs_header_bytenr(buf))
@@ -110,6 +95,28 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
 
 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
 {
+       int ret;
+       int total_devs = 1;
+       int dev_nr;
+       struct extent_buffer *eb;
+       u64 physical;
+       u64 length;
+       struct btrfs_device *device;
+
+       eb = btrfs_find_tree_block(root, bytenr, blocksize);
+       if (eb && btrfs_buffer_uptodate(eb)) {
+               free_extent_buffer(eb);
+               return 0;
+       }
+
+       dev_nr = 0;
+       ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, dev_nr,
+                             bytenr, &physical, &length, &device,
+                             &total_devs);
+       BUG_ON(ret);
+       device->total_ios++;
+       blocksize = min(blocksize, (u32)(64 * 1024));
+       readahead(device->fd, physical, blocksize);
        return 0;
 }
 
@@ -117,35 +124,69 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
                                     u32 blocksize)
 {
        int ret;
+       int total_devs = 1;
+       int dev_nr;
        struct extent_buffer *eb;
+       u64 physical;
+       u64 length;
+       struct btrfs_device *device;
 
        eb = btrfs_find_create_tree_block(root, bytenr, blocksize);
        if (!eb)
                return NULL;
-       if (!btrfs_buffer_uptodate(eb)) {
-               btrfs_map_bh_to_logical(root, eb, eb->start);
-               ret = read_extent_from_disk(eb);
-               if (ret) {
-                       free_extent_buffer(eb);
-                       return NULL;
-               }
-               btrfs_set_buffer_uptodate(eb);
+
+       if (btrfs_buffer_uptodate(eb))
+               return eb;
+
+       dev_nr = 0;
+       ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, dev_nr,
+                             eb->start, &physical, &length, &device,
+                             &total_devs);
+       BUG_ON(ret);
+       eb->fd = device->fd;
+       device->total_ios++;
+       eb->dev_bytenr = physical;
+       ret = read_extent_from_disk(eb);
+       if (ret) {
+               free_extent_buffer(eb);
+               return NULL;
        }
+       btrfs_set_buffer_uptodate(eb);
        return eb;
 }
 
 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                     struct extent_buffer *eb)
 {
+       int ret;
+       int total_devs = 1;
+       int dev_nr;
+       u64 physical;
+       u64 length;
+       struct btrfs_device *device;
+
        if (check_tree_block(root, eb))
                BUG();
        if (!btrfs_buffer_uptodate(eb))
                BUG();
 
        btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
-       btrfs_map_bh_to_logical(root, eb, eb->start);
        csum_tree_block(root, eb, 0);
-       return write_extent_to_disk(eb);
+
+       dev_nr = 0;
+       while(dev_nr < total_devs) {
+               ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE,
+                                     dev_nr, eb->start, &physical, &length,
+                                     &device, &total_devs);
+               BUG_ON(ret);
+               eb->fd = device->fd;
+               eb->dev_bytenr = physical;
+               dev_nr++;
+               device->total_ios++;
+               ret = write_extent_to_disk(eb);
+               BUG_ON(ret);
+       }
+       return 0;
 }
 
 static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
index b9cf92f..d845ada 100644 (file)
@@ -1620,8 +1620,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        struct btrfs_key keys[2];
        int extra_alloc_flags = 0;
 
-       if (btrfs_super_num_devices(&info->super_copy) > 1)
-               extra_alloc_flags = BTRFS_BLOCK_GROUP_RAID0;
+       if (0 && btrfs_super_num_devices(&info->super_copy) > 1) {
+               if (data)
+                       extra_alloc_flags = BTRFS_BLOCK_GROUP_RAID0;
+               else
+                       extra_alloc_flags = BTRFS_BLOCK_GROUP_RAID1;
+       }
 
        if (data) {
                data = BTRFS_BLOCK_GROUP_DATA | extra_alloc_flags;
index c7aee68..e6e9987 100644 (file)
 #include <endian.h>
 #include <byteswap.h>
 
+#ifndef READ
+#define READ 0
+#define WRITE 1
+#define READA 2
+#endif
+
 #define gfp_t int
 #define get_cpu_var(p) (p)
 #define __get_cpu_var(p) (p)
diff --git a/mkfs.c b/mkfs.c
index aa05536..0d2ca38 100644 (file)
--- a/mkfs.c
+++ b/mkfs.c
@@ -163,6 +163,8 @@ int main(int ac, char **av)
        char *file;
        u64 block_count = 0;
        u64 dev_block_count = 0;
+       u64 chunk_start;
+       u64 chunk_size;
        int fd;
        int first_fd;
        int ret;
@@ -295,6 +297,29 @@ int main(int ac, char **av)
                close(fd);
                btrfs_register_one_device(file);
        }
+
+       ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+                               &chunk_start, &chunk_size,
+                               BTRFS_BLOCK_GROUP_METADATA |
+                               BTRFS_BLOCK_GROUP_RAID1);
+       BUG_ON(ret);
+       ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
+                                    BTRFS_BLOCK_GROUP_METADATA |
+                                    BTRFS_BLOCK_GROUP_RAID1,
+                                    BTRFS_CHUNK_TREE_OBJECTID,
+                                    chunk_start, chunk_size);
+       BUG_ON(ret);
+       ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+                               &chunk_start, &chunk_size,
+                               BTRFS_BLOCK_GROUP_DATA |
+                               BTRFS_BLOCK_GROUP_RAID0);
+       BUG_ON(ret);
+       ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
+                                    BTRFS_BLOCK_GROUP_DATA |
+                                    BTRFS_BLOCK_GROUP_RAID0,
+                                    BTRFS_CHUNK_TREE_OBJECTID,
+                                    chunk_start, chunk_size);
+       BUG_ON(ret);
        btrfs_commit_transaction(trans, root);
        ret = close_ctree(root);
        BUG_ON(ret);
diff --git a/utils.c b/utils.c
index 323b108..3361030 100644 (file)
--- a/utils.c
+++ b/utils.c
@@ -404,31 +404,40 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
 {
        struct btrfs_super_block *disk_super;
        struct btrfs_super_block *super = &root->fs_info->super_copy;
-       struct btrfs_device device;
+       struct btrfs_device *device;
        struct btrfs_dev_item *dev_item;
        char *buf;
        u64 total_bytes;
        u64 num_devs;
        int ret;
 
-       buf = malloc(sectorsize);
+       device = kmalloc(sizeof(*device), GFP_NOFS);
+       if (!device)
+               return -ENOMEM;
+       buf = kmalloc(sectorsize, GFP_NOFS);
+       if (!buf) {
+               kfree(device);
+               return -ENOMEM;
+       }
        BUG_ON(sizeof(*disk_super) > sectorsize);
        memset(buf, 0, sectorsize);
 
        disk_super = (struct btrfs_super_block *)buf;
        dev_item = &disk_super->dev_item;
 
-       uuid_generate(device.uuid);
-       device.devid = 0;
-       device.type = 0;
-       device.io_width = io_width;
-       device.io_align = io_align;
-       device.sector_size = sectorsize;
-       device.fd = 0;
-       device.total_bytes = block_count;
-       device.bytes_used = 0;
-
-       ret = btrfs_add_device(trans, root, &device);
+       uuid_generate(device->uuid);
+       device->devid = 0;
+       device->type = 0;
+       device->io_width = io_width;
+       device->io_align = io_align;
+       device->sector_size = sectorsize;
+       device->fd = 0;
+       device->total_bytes = block_count;
+       device->bytes_used = 0;
+       device->total_ios = 0;
+       device->dev_root = root->fs_info->dev_root;
+
+       ret = btrfs_add_device(trans, root, device);
        BUG_ON(ret);
 
        total_bytes = btrfs_super_total_bytes(super) + block_count;
@@ -439,20 +448,21 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
 
        memcpy(disk_super, super, sizeof(*disk_super));
 
-       printf("adding device id %llu\n", (unsigned long long)device.devid);
-       btrfs_set_stack_device_id(dev_item, device.devid);
-       btrfs_set_stack_device_type(dev_item, device.type);
-       btrfs_set_stack_device_io_align(dev_item, device.io_align);
-       btrfs_set_stack_device_io_width(dev_item, device.io_width);
-       btrfs_set_stack_device_sector_size(dev_item, device.sector_size);
-       btrfs_set_stack_device_total_bytes(dev_item, device.total_bytes);
-       btrfs_set_stack_device_bytes_used(dev_item, device.bytes_used);
-       memcpy(&dev_item->uuid, device.uuid, BTRFS_DEV_UUID_SIZE);
+       printf("adding device id %llu\n", (unsigned long long)device->devid);
+       btrfs_set_stack_device_id(dev_item, device->devid);
+       btrfs_set_stack_device_type(dev_item, device->type);
+       btrfs_set_stack_device_io_align(dev_item, device->io_align);
+       btrfs_set_stack_device_io_width(dev_item, device->io_width);
+       btrfs_set_stack_device_sector_size(dev_item, device->sector_size);
+       btrfs_set_stack_device_total_bytes(dev_item, device->total_bytes);
+       btrfs_set_stack_device_bytes_used(dev_item, device->bytes_used);
+       memcpy(&dev_item->uuid, device->uuid, BTRFS_DEV_UUID_SIZE);
 
        ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
        BUG_ON(ret != sectorsize);
 
-       free(buf);
+       kfree(buf);
+       list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
        return 0;
 }
 
index d3ee38b..ef90872 100644 (file)
--- a/volumes.c
+++ b/volumes.c
@@ -232,6 +232,10 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans,
 
        /* FIXME use last free of some kind */
 
+       /* we don't want to overwrite the superblock on the drive,
+        * so we make sure to start at an offset of at least 1MB
+        */
+       search_start = max((u64)1024 * 1024, search_start);
        key.objectid = device->devid;
        key.offset = search_start;
        key.type = BTRFS_DEV_EXTENT_KEY;
@@ -578,12 +582,15 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        if (list_empty(dev_list))
                return -ENOSPC;
 
-       if (type & BTRFS_BLOCK_GROUP_RAID0)
+       if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) {
+               calc_size = 1024 * 1024 * 1024;
+       }
+       if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
+               num_stripes = min_t(u64, 2,
+                                 btrfs_super_num_devices(&info->super_copy));
+       }
+       if (type & (BTRFS_BLOCK_GROUP_RAID0))
                num_stripes = btrfs_super_num_devices(&info->super_copy);
-       if (type & BTRFS_BLOCK_GROUP_DATA)
-               stripe_len = 64 * 1024;
-       if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))
-               stripe_len = 32 * 1024;
 again:
        INIT_LIST_HEAD(&private_devs);
        cur = dev_list->next;
@@ -628,7 +635,11 @@ again:
 
        stripes = &chunk->stripe;
 
-       *num_bytes = calc_size * num_stripes;
+       if (type & BTRFS_BLOCK_GROUP_RAID1)
+               *num_bytes = calc_size;
+       else
+               *num_bytes = calc_size * num_stripes;
+
        index = 0;
        while(index < num_stripes) {
                BUG_ON(list_empty(&private_devs));
@@ -695,9 +706,9 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
        cache_tree_init(&tree->cache_tree);
 }
 
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
-                   u64 logical, u64 *phys, u64 *length,
-                   struct btrfs_device **dev)
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+                   int dev_nr, u64 logical, u64 *phys, u64 *length,
+                   struct btrfs_device **dev, int *total_devs)
 {
        struct cache_extent *ce;
        struct map_lookup *map;
@@ -725,20 +736,28 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
        /* stripe_offset is the offset of this block in its stripe*/
        stripe_offset = offset - stripe_offset;
 
-       /*
-        * after this do_div call, stripe_nr is the number of stripes
-        * on this device we have to walk to find the data, and
-        * stripe_index is the number of our device in the stripe array
-        */
-       stripe_index = stripe_nr % map->num_stripes;
-       stripe_nr = stripe_nr / map->num_stripes;
-
+       if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+               stripe_index = dev_nr;
+               if (rw == WRITE)
+                       *total_devs = map->num_stripes;
+               else {
+                       stripe_index = stripe_nr % map->num_stripes;
+                       *total_devs = 1;
+               }
+       } else {
+               /*
+                * after this do_div call, stripe_nr is the number of stripes
+                * on this device we have to walk to find the data, and
+                * stripe_index is the number of our device in the stripe array
+                */
+               stripe_index = stripe_nr % map->num_stripes;
+               stripe_nr = stripe_nr / map->num_stripes;
+       }
        BUG_ON(stripe_index >= map->num_stripes);
-
        *phys = map->stripes[stripe_index].physical + stripe_offset +
                stripe_nr * map->stripe_len;
 
-       if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) {
                /* we limit the length of each bio to what fits in a stripe */
                *length = min_t(u64, ce->size - offset,
                              map->stripe_len - stripe_offset);
@@ -846,6 +865,7 @@ static int read_one_dev(struct btrfs_root *root,
                device = kmalloc(sizeof(*device), GFP_NOFS);
                if (!device)
                        return -ENOMEM;
+               device->total_ios = 0;
                list_add(&device->dev_list,
                         &root->fs_info->fs_devices->devices);
        }
index ae9e7bb..a51cfe5 100644 (file)
--- a/volumes.h
+++ b/volumes.h
@@ -22,6 +22,8 @@ struct btrfs_device {
        struct list_head dev_list;
        struct btrfs_root *dev_root;
 
+       u64 total_ios;
+
        int fd;
 
        char *name;
@@ -67,9 +69,9 @@ struct btrfs_fs_devices {
 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
                           struct btrfs_device *device,
                           u64 owner, u64 num_bytes, u64 *start);
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_nr,
                    u64 logical, u64 *phys, u64 *length,
-                   struct btrfs_device **dev);
+                   struct btrfs_device **dev, int *total_stripes);
 int btrfs_read_sys_array(struct btrfs_root *root);
 int btrfs_read_chunk_tree(struct btrfs_root *root);
 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,