Add support for multiple devices per filesystem
authorChris Mason <chris.mason@oracle.com>
Mon, 24 Mar 2008 19:03:18 +0000 (15:03 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Mon, 24 Mar 2008 19:03:18 +0000 (15:03 -0400)
16 files changed:
Makefile
convert.c
ctree.c
ctree.h
debug-tree.c
disk-io.c
disk-io.h
extent-cache.c
extent-tree.c
extent_io.c
mkfs.c
print-tree.c
utils.c
utils.h
volumes.c [new file with mode: 0644]
volumes.h [new file with mode: 0644]

index 4cabeef5f8a35d93bf11b92b3cf6a34025597caf..ff7ddadab81a5cc91d5f365d2feff7e84964af3f 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@ CFLAGS = -g -Werror
 objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
          root-tree.o dir-item.o hash.o file-item.o inode-item.o \
          inode-map.o crc32c.o rbtree.o extent-cache.o extent_io.o \
+         volumes.o
 #
 CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \
                -Wuninitialized -Wshadow -Wundef
index d407bb81c42c0c858bc8843fbbd363a312fd45a0..83928f4be5901968d7e01548b08dd70ac95b73ba 100644 (file)
--- a/convert.c
+++ b/convert.c
@@ -341,7 +341,7 @@ static int record_file_extent(struct btrfs_trans_handle *trans,
                if (ret)
                        goto fail;
                ret = btrfs_update_block_group(trans, root, disk_bytenr,
-                                              num_bytes, 1, 0, 1);
+                                              num_bytes, 1, 0);
        } else if (ret == -EEXIST) {
                ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes,
                                           root->root_key.objectid,
@@ -1432,7 +1432,7 @@ static int fixup_block_accounting(struct btrfs_trans_handle *trans,
                if (key.type == BTRFS_EXTENT_ITEM_KEY) {
                        bytes_used += key.offset;
                        ret = btrfs_update_block_group(trans, root,
-                                 key.objectid, key.offset, 1, 0, 1);
+                                 key.objectid, key.offset, 1, 0);
                        BUG_ON(ret);
                }
                path.slots[0]++;
@@ -1547,7 +1547,7 @@ int do_convert(const char *devname, int datacsum, int packing, int noxattr)
 {
        int i, fd, ret;
        u32 blocksize;
-       u64 blocks[4];
+       u64 blocks[6];
        u64 total_bytes;
        u64 super_bytenr;
        ext2_filsys ext2_fs;
@@ -1656,7 +1656,7 @@ int do_rollback(const char *devname, int force)
        struct btrfs_dir_item *dir;
        struct btrfs_inode_item *inode;
        struct btrfs_file_extent_item *fi;
-       struct btrfs_inode_timespec *tspec;
+       struct btrfs_timespec *tspec;
        struct extent_buffer *leaf;
        struct btrfs_key key;
        struct btrfs_path path;
diff --git a/ctree.c b/ctree.c
index 0e2fe7f6f2077b6fdf026a317c524236f536ae09..88ebd9ec0c13ba2735c4959af1d77654b7bb6d65 100644 (file)
--- a/ctree.c
+++ b/ctree.c
@@ -68,6 +68,14 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
        memset(p, 0, sizeof(*p));
 }
 
+static void add_root_to_dirty_list(struct btrfs_root *root)
+{
+       if (root->track_dirty && list_empty(&root->dirty_list)) {
+               list_add(&root->dirty_list,
+                        &root->fs_info->dirty_cowonly_roots);
+       }
+}
+
 int btrfs_copy_root(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root,
                      struct extent_buffer *buf,
@@ -194,6 +202,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                                          root_gen, 0, 0, 1);
                }
                free_extent_buffer(buf);
+               add_root_to_dirty_list(root);
        } else {
                root_gen = btrfs_header_generation(parent);
                btrfs_set_node_blockptr(parent, parent_slot,
@@ -237,7 +246,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
                return 0;
        }
 
-       search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
+       search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
        ret = __btrfs_cow_block(trans, root, buf, parent,
                                 parent_slot, cow_ret, search_start, 0);
        return ret;
@@ -690,6 +699,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
                BUG_ON(ret);
 
                root->node = child;
+               add_root_to_dirty_list(root);
                path->nodes[level] = NULL;
                clean_tree_block(trans, root, mid);
                wait_on_tree_block_writeback(root, mid);
@@ -1338,6 +1348,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
        /* the super has an extra ref to root->node */
        free_extent_buffer(root->node);
        root->node = c;
+       add_root_to_dirty_list(root);
        extent_buffer_get(c);
        path->nodes[level] = c;
        path->slots[level] = 0;
@@ -2619,3 +2630,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
        }
        return 0;
 }
+
+int btrfs_previous_item(struct btrfs_root *root,
+                       struct btrfs_path *path, u64 min_objectid,
+                       int type)
+{
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf;
+       int ret;
+
+       while(1) {
+               if (path->slots[0] == 0) {
+                       ret = btrfs_prev_leaf(root, path);
+                       if (ret != 0)
+                               return ret;
+               } else {
+                       path->slots[0]--;
+               }
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+               if (found_key.type == type)
+                       return 0;
+       }
+       return 1;
+}
+
diff --git a/ctree.h b/ctree.h
index 113d38f5586df4bc2e5249e1e8b61410e9b504f3..c8585147cd6986a196cb31e7e3d5edfa2c1b2461 100644 (file)
--- a/ctree.h
+++ b/ctree.h
@@ -30,12 +30,44 @@ struct btrfs_trans_handle;
 #define BTRFS_MAGIC "_B4RfS_M"
 
 #define BTRFS_MAX_LEVEL 8
+
+/* holds pointers to all of the tree roots */
 #define BTRFS_ROOT_TREE_OBJECTID 1ULL
+
+/* stores information about which extents are in use, and reference counts */
 #define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+
+/* one per subvolume, storing files and directories */
 #define BTRFS_FS_TREE_OBJECTID 3ULL
+
+/* directory objectid inside the root tree */
 #define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL
+
+
+/*
+ * chunk tree stores translations from logical -> physical block numbering
+ * the super block points to the chunk tree
+ */
+#define BTRFS_CHUNK_TREE_OBJECTID 5ULL
+
+/*
+ * stores information about which areas of a given device are in use.
+ * one per device.  The tree of tree roots points to the device tree
+ */
+#define BTRFS_DEV_TREE_OBJECTID 6ULL
+
+/*
+ * All files have objectids higher than this.
+ */
 #define BTRFS_FIRST_FREE_OBJECTID 256ULL
 
+
+/*
+ * the device items go into the chunk tree.  The key is in the form
+ * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
+ */
+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+
 /*
  * we can actually store much bigger names, but lets not confuse the rest
  * of linux
@@ -85,6 +117,81 @@ struct btrfs_key {
        u64 offset;
 } __attribute__ ((__packed__));
 
+struct btrfs_mapping_tree {
+       struct cache_tree cache_tree;
+};
+
+#define BTRFS_DEV_UUID_SIZE 16
+struct btrfs_dev_item {
+       /* the internal btrfs device id */
+       __le64 devid;
+
+       /* size of the device */
+       __le64 total_bytes;
+
+       /* bytes used */
+       __le64 bytes_used;
+
+       /* optimal io alignment for this device */
+       __le32 io_align;
+
+       /* optimal io width for this device */
+       __le32 io_width;
+
+       /* minimal io size for this device */
+       __le32 sector_size;
+
+       /* the kernel device number */
+       __le64 rdev;
+
+       /* type and info about this device */
+       __le64 type;
+
+       /* partition number, 0 for whole dev */
+       __le32 partition;
+
+       /* length of the name data at the end of the item */
+       __le16 name_len;
+
+       /* physical drive uuid (or lvm uuid) */
+       u8 uuid[BTRFS_DEV_UUID_SIZE];
+       /* name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_stripe {
+       __le64 devid;
+       __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_chunk {
+       __le64 owner;
+       __le64 stripe_len;
+       __le64 type;
+
+       /* optimal io alignment for this chunk */
+       __le32 io_align;
+
+       /* optimal io width for this chunk */
+       __le32 io_width;
+
+       /* minimal io size for this chunk */
+       __le32 sector_size;
+
+       /* 2^16 stripes is quite a lot, a second limit is the size of a single
+        * item in the btree
+        */
+       __le16 num_stripes;
+       struct btrfs_stripe stripe;
+       /* additional stripes go here */
+} __attribute__ ((__packed__));
+
+static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+{
+       BUG_ON(num_stripes == 0);
+       return sizeof(struct btrfs_chunk) +
+               sizeof(struct btrfs_stripe) * (num_stripes - 1);
+}
+
 #define BTRFS_FSID_SIZE 16
 /*
  * every tree block (leaf or node) starts with this header.
@@ -108,6 +215,13 @@ struct btrfs_header {
 #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
                                        sizeof(struct btrfs_item) - \
                                        sizeof(struct btrfs_file_extent_item))
+
+/*
+ * this is a very generous portion of the super block, giving us
+ * room to translate 14 chunks with 3 stripes each.
+ */
+#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+
 /*
  * the super block basically lists the main trees of the FS
  * it currently lacks any block count etc etc
@@ -120,6 +234,7 @@ struct btrfs_super_block {
        __le64 magic;
        __le64 generation;
        __le64 root;
+       __le64 chunk_root;
        __le64 total_bytes;
        __le64 bytes_used;
        __le64 root_dir_objectid;
@@ -127,7 +242,10 @@ struct btrfs_super_block {
        __le32 nodesize;
        __le32 leafsize;
        __le32 stripesize;
+       __le32 sys_chunk_array_size;
        u8 root_level;
+       u8 chunk_root_level;
+       u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
 } __attribute__ ((__packed__));
 
 /*
@@ -197,12 +315,22 @@ struct btrfs_extent_ref {
        __le64 offset;
 } __attribute__ ((__packed__));
 
+/* dev extents record free space on individual devices.  The owner
+ * field points back to the chunk allocation mapping tree that allocated
+ * the extent
+ */
+struct btrfs_dev_extent {
+       __le64 owner;
+       __le64 length;
+} __attribute__ ((__packed__));
+
+
 struct btrfs_inode_ref {
        __le16 name_len;
        /* name goes here */
 } __attribute__ ((__packed__));
 
-struct btrfs_inode_timespec {
+struct btrfs_timespec {
        __le64 sec;
        __le32 nsec;
 } __attribute__ ((__packed__));
@@ -220,13 +348,13 @@ struct btrfs_inode_item {
        __le32 uid;
        __le32 gid;
        __le32 mode;
-       __le32 rdev;
+       __le64 rdev;
        __le16 flags;
        __le16 compat_flags;
-       struct btrfs_inode_timespec atime;
-       struct btrfs_inode_timespec ctime;
-       struct btrfs_inode_timespec mtime;
-       struct btrfs_inode_timespec otime;
+       struct btrfs_timespec atime;
+       struct btrfs_timespec ctime;
+       struct btrfs_timespec mtime;
+       struct btrfs_timespec otime;
 } __attribute__ ((__packed__));
 
 struct btrfs_dir_item {
@@ -280,24 +408,26 @@ struct btrfs_csum_item {
 } __attribute__ ((__packed__));
 
 /* tag for the radix tree of block groups in ram */
-#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024)
-
-#define BTRFS_BLOCK_GROUP_DATA 1
-#define BTRFS_BLOCK_GROUP_MIXED 2
+#define BTRFS_BLOCK_GROUP_DATA     (1 << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM   (1 << 1)
+#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
 
 struct btrfs_block_group_item {
        __le64 used;
-       u8 flags;
+       __le64 chunk_tree;
+       __le64 chunk_objectid;
+       __le64 flags;
 } __attribute__ ((__packed__));
 
 struct btrfs_block_group_cache {
        struct cache_extent cache;
        struct btrfs_key key;
        struct btrfs_block_group_item item;
-       int data;
-       int cached;
        u64 pinned;
+       u64 flags;
+       int cached;
 };
+
 struct btrfs_extent_ops {
        int (*alloc_extent)(struct btrfs_root *root, u64 num_bytes,
                           u64 hint_byte, struct btrfs_key *ins);
@@ -305,11 +435,14 @@ struct btrfs_extent_ops {
                          u64 num_bytes);
 };
 
+struct btrfs_device;
 struct btrfs_fs_info {
        u8 fsid[BTRFS_FSID_SIZE];
        struct btrfs_root *fs_root;
        struct btrfs_root *extent_root;
        struct btrfs_root *tree_root;
+       struct btrfs_root *chunk_root;
+       struct btrfs_root *dev_root;
 
        struct extent_io_tree extent_cache;
        struct extent_io_tree free_space_cache;
@@ -318,18 +451,27 @@ struct btrfs_fs_info {
        struct extent_io_tree pending_del;
        struct extent_io_tree extent_ins;
 
+       /* logical->physical extent mapping */
+       struct btrfs_mapping_tree mapping_tree;
+
        u64 generation;
        u64 last_trans_committed;
        struct btrfs_trans_handle *running_transaction;
        struct btrfs_super_block super_copy;
        struct extent_buffer *sb_buffer;
        struct mutex fs_mutex;
-       int fp;
        u64 total_pinned;
 
        struct btrfs_extent_ops *extent_ops;
+       struct list_head dirty_cowonly_roots;
+
+       struct list_head devices;
+       struct list_head *last_device;
+       int fp;
+       int force_system_allocs;
        void *priv_data;
 };
+
 /*
  * in ram representation of the tree.  extent_root is used for all allocations
  * and for the extent tree extent_root root.
@@ -356,10 +498,15 @@ struct btrfs_root {
        u32 stripesize;
 
        int ref_cows;
+       int track_dirty;
+
 
        u32 type;
        u64 highest_inode;
        u64 last_inode_alloc;
+
+       /* the dirty list is only used by non-reference counted roots */
+       struct list_head dirty_list;
 };
 
 /*
@@ -408,6 +555,10 @@ struct btrfs_root {
  */
 #define BTRFS_BLOCK_GROUP_ITEM_KEY 50
 
+#define BTRFS_DEV_EXTENT_KEY   75
+#define BTRFS_DEV_ITEM_KEY     76
+#define BTRFS_CHUNK_ITEM_KEY   77
+
 /*
  * string items are for debugging.  They just store a short string of
  * data in the FS
@@ -471,11 +622,104 @@ static inline void btrfs_set_##name(type *s, u##bits val)                \
        s->member = cpu_to_le##bits(val);                               \
 }
 
+BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
+BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
+BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
+BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
+BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
+BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_FUNCS(device_rdev, struct btrfs_dev_item, rdev, 64);
+BTRFS_SETGET_FUNCS(device_partition, struct btrfs_dev_item, partition, 32);
+BTRFS_SETGET_FUNCS(device_name_len, struct btrfs_dev_item, name_len, 16);
+
+static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+{
+       return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+}
+
+static inline char *btrfs_device_name(struct btrfs_dev_item *d)
+{
+       return (char *)(d + 1);
+}
+
+BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
+BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
+BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
+BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
+BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
+BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
+                        stripe_len, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
+                        io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
+                        io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
+                        sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
+                        num_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
+                                                  int nr)
+{
+       unsigned long offset = (unsigned long)c;
+       offset += offsetof(struct btrfs_chunk, stripe);
+       offset += nr * sizeof(struct btrfs_stripe);
+       return (struct btrfs_stripe *)offset;
+}
+
+static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
+                                        struct btrfs_chunk *c, int nr)
+{
+       return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
+                                            struct btrfs_chunk *c, int nr,
+                                            u64 val)
+{
+       btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
+                                        struct btrfs_chunk *c, int nr)
+{
+       return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
+                                            struct btrfs_chunk *c, int nr,
+                                            u64 val)
+{
+       btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
+}
+
 /* struct btrfs_block_group_item */
 BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
                         used, 64);
 BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
                         used, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_tree, struct btrfs_block_group_item,
+                        chunk_tree, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_tree, struct btrfs_block_group_item,
+                        chunk_tree, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
+                       struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_objecitd,
+                  struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_flags,
+                  struct btrfs_block_group_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_flags,
+                       struct btrfs_block_group_item, flags, 64);
 
 /* struct btrfs_inode_ref */
 BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
@@ -489,7 +733,7 @@ BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
 BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
 BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
 BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
-BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32);
+BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
 BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16);
 BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item,
                   compat_flags, 16);
@@ -511,51 +755,55 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid,
 BTRFS_SETGET_STACK_FUNCS(stack_inode_mode,
                         struct btrfs_inode_item, mode, 32);
 BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev,
-                        struct btrfs_inode_item, rdev, 32);
+                        struct btrfs_inode_item, rdev, 64);
 BTRFS_SETGET_STACK_FUNCS(stack_inode_flags,
                         struct btrfs_inode_item, flags, 16);
 BTRFS_SETGET_STACK_FUNCS(stack_inode_compat_flags,
                         struct btrfs_inode_item, compat_flags, 16);
 
-static inline struct btrfs_inode_timespec *
+static inline struct btrfs_timespec *
 btrfs_inode_atime(struct btrfs_inode_item *inode_item)
 {
        unsigned long ptr = (unsigned long)inode_item;
        ptr += offsetof(struct btrfs_inode_item, atime);
-       return (struct btrfs_inode_timespec *)ptr;
+       return (struct btrfs_timespec *)ptr;
 }
 
-static inline struct btrfs_inode_timespec *
+static inline struct btrfs_timespec *
 btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
 {
        unsigned long ptr = (unsigned long)inode_item;
        ptr += offsetof(struct btrfs_inode_item, mtime);
-       return (struct btrfs_inode_timespec *)ptr;
+       return (struct btrfs_timespec *)ptr;
 }
 
-static inline struct btrfs_inode_timespec *
+static inline struct btrfs_timespec *
 btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
 {
        unsigned long ptr = (unsigned long)inode_item;
        ptr += offsetof(struct btrfs_inode_item, ctime);
-       return (struct btrfs_inode_timespec *)ptr;
+       return (struct btrfs_timespec *)ptr;
 }
 
-static inline struct btrfs_inode_timespec *
+static inline struct btrfs_timespec *
 btrfs_inode_otime(struct btrfs_inode_item *inode_item)
 {
        unsigned long ptr = (unsigned long)inode_item;
        ptr += offsetof(struct btrfs_inode_item, otime);
-       return (struct btrfs_inode_timespec *)ptr;
+       return (struct btrfs_timespec *)ptr;
 }
 
-BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64);
-BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_inode_timespec,
+BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec,
                         sec, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_inode_timespec,
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec,
                         nsec, 32);
 
+/* struct btrfs_dev_extent */
+BTRFS_SETGET_FUNCS(dev_extent_owner, struct btrfs_dev_extent, owner, 64);
+BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
+
 /* struct btrfs_extent_item */
 BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
 
@@ -830,8 +1078,14 @@ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
 BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
                         generation, 64);
 BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
+                        struct btrfs_super_block, sys_chunk_array_size, 32);
 BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
                         root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
+                        chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
+                        chunk_root_level, 64);
 BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
                         total_bytes, 64);
 BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
@@ -963,14 +1217,19 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root);
 int btrfs_free_block_groups(struct btrfs_fs_info *info);
 int btrfs_read_block_groups(struct btrfs_root *root);
-int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root);
+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root, u64 bytes_used,
+                          u64 type, u64 chunk_tree, u64 chunk_objectid,
+                          u64 size);
 u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation,
                          u64 owner, u64 owner_offset);
 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 bytenr, u64 num,
-                            int alloc, int mark_free, int data);
+                            int alloc, int mark_free);
 /* ctree.c */
+int btrfs_previous_item(struct btrfs_root *root,
+                       struct btrfs_path *path, u64 min_objectid,
+                       int type);
 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2);
 int btrfs_cow_block(struct btrfs_trans_handle *trans,
                    struct btrfs_root *root, struct extent_buffer *buf,
index 7a2d0d06535003c4c6b40237d7f4df46a57e59a1..acfa442da3344ee315cb23d100ed5acc4b81e177 100644 (file)
@@ -139,6 +139,10 @@ int main(int ac, char **av)
                printf("root tree\n");
                btrfs_print_tree(root->fs_info->tree_root,
                                 root->fs_info->tree_root->node);
+
+               printf("chunk tree\n");
+               btrfs_print_tree(root->fs_info->chunk_root,
+                                root->fs_info->chunk_root->node);
        }
        btrfs_init_path(&path);
        key.offset = 0;
@@ -170,7 +174,13 @@ int main(int ac, char **av)
                                              root->leafsize);
                        switch(found_key.objectid) {
                        case BTRFS_ROOT_TREE_OBJECTID:
-                               printf("root ");
+                               if (!skip)
+                                       printf("root ");
+                               break;
+                       case BTRFS_DEV_TREE_OBJECTID:
+                               if (!skip) {
+                                       printf("device extent tree ");
+                               }
                                break;
                        case BTRFS_EXTENT_TREE_OBJECTID:
                                skip = 0;
index 0957c633a0df39af75328ead97b90111f6cb455a..571cb09eb0460cab288335caf8e64f28d698c0c4 100644 (file)
--- a/disk-io.c
+++ b/disk-io.c
 #include "radix-tree.h"
 #include "ctree.h"
 #include "disk-io.h"
+#include "volumes.h"
 #include "transaction.h"
 #include "crc32c.h"
 
+int btrfs_open_device(struct btrfs_device *dev)
+{
+       dev->fd = open(dev->name, O_RDWR, 0600);
+       BUG_ON(dev->fd < 0);
+       return 0;
+}
+
 int btrfs_map_bh_to_logical(struct btrfs_root *root, struct extent_buffer *buf,
                            u64 logical)
 {
-       buf->fd = root->fs_info->fp;
-       buf->dev_bytenr = logical;
+       u64 physical;
+       u64 length;
+       struct btrfs_device *device;
+       int ret;
+
+       ret = btrfs_map_block(&root->fs_info->mapping_tree, logical, &physical,
+                             &length, &device);
+       BUG_ON(ret);
+       buf->fd = device->fd;
+       buf->dev_bytenr = physical;
        return 0;
 }
 
@@ -146,39 +162,56 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->leafsize = leafsize;
        root->stripesize = stripesize;
        root->ref_cows = 0;
+       root->track_dirty = 0;
+
        root->fs_info = fs_info;
        root->objectid = objectid;
        root->last_trans = 0;
        root->highest_inode = 0;
        root->last_inode_alloc = 0;
+
+       INIT_LIST_HEAD(&root->dirty_list);
        memset(&root->root_key, 0, sizeof(root->root_key));
        memset(&root->root_item, 0, sizeof(root->root_item));
        root->root_key.objectid = objectid;
        return 0;
 }
 
-static int commit_tree_roots(struct btrfs_trans_handle *trans,
-                            struct btrfs_fs_info *fs_info)
+static int update_cowonly_root(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root)
 {
        int ret;
-       u64 old_extent_bytenr;
-       struct btrfs_root *tree_root = fs_info->tree_root;
-       struct btrfs_root *extent_root = fs_info->extent_root;
+       u64 old_root_bytenr;
+       struct btrfs_root *tree_root = root->fs_info->tree_root;
 
-       btrfs_write_dirty_block_groups(trans, fs_info->extent_root);
+       btrfs_write_dirty_block_groups(trans, root);
        while(1) {
-               old_extent_bytenr = btrfs_root_bytenr(&extent_root->root_item);
-               if (old_extent_bytenr == extent_root->node->start)
+               old_root_bytenr = btrfs_root_bytenr(&root->root_item);
+               if (old_root_bytenr == root->node->start)
                        break;
-               btrfs_set_root_bytenr(&extent_root->root_item,
-                                      extent_root->node->start);
-               extent_root->root_item.level =
-                       btrfs_header_level(extent_root->node);
+               btrfs_set_root_bytenr(&root->root_item,
+                                      root->node->start);
+               root->root_item.level = btrfs_header_level(root->node);
                ret = btrfs_update_root(trans, tree_root,
-                                       &extent_root->root_key,
-                                       &extent_root->root_item);
+                                       &root->root_key,
+                                       &root->root_item);
                BUG_ON(ret);
-               btrfs_write_dirty_block_groups(trans, fs_info->extent_root);
+               btrfs_write_dirty_block_groups(trans, root);
+       }
+       return 0;
+}
+
+static int commit_tree_roots(struct btrfs_trans_handle *trans,
+                            struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_root *root;
+       struct list_head *next;
+
+       while(!list_empty(&fs_info->dirty_cowonly_roots)) {
+               next = fs_info->dirty_cowonly_roots.next;
+               list_del_init(next);
+               root = list_entry(next, struct btrfs_root, dirty_list);
+               update_cowonly_root(trans, root);
        }
        return 0;
 }
@@ -384,6 +417,8 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr)
        struct btrfs_root *root = malloc(sizeof(struct btrfs_root));
        struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
        struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
+       struct btrfs_root *chunk_root = malloc(sizeof(struct btrfs_root));
+       struct btrfs_root *dev_root = malloc(sizeof(struct btrfs_root));
        struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info));
        int ret;
        struct btrfs_super_block *disk_super;
@@ -398,6 +433,10 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr)
        fs_info->extent_root = extent_root;
        fs_info->extent_ops = NULL;
        fs_info->priv_data = NULL;
+       fs_info->chunk_root = chunk_root;
+       fs_info->dev_root = dev_root;
+       fs_info->force_system_allocs = 0;
+
        extent_io_tree_init(&fs_info->extent_cache);
        extent_io_tree_init(&fs_info->free_space_cache);
        extent_io_tree_init(&fs_info->block_group_cache);
@@ -405,13 +444,25 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr)
        extent_io_tree_init(&fs_info->pending_del);
        extent_io_tree_init(&fs_info->extent_ins);
 
+       cache_tree_init(&fs_info->mapping_tree.cache_tree);
+
        mutex_init(&fs_info->fs_mutex);
+       INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
+       INIT_LIST_HEAD(&fs_info->devices);
+       fs_info->last_device = &fs_info->devices;
 
-       __setup_root(512, 512, 512, 512, tree_root,
+       __setup_root(4096, 4096, 4096, 4096, tree_root,
                     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
-       fs_info->sb_buffer = read_tree_block(tree_root, sb_bytenr, 512);
+       fs_info->sb_buffer = btrfs_find_create_tree_block(tree_root, sb_bytenr,
+                                                         4096);
        BUG_ON(!fs_info->sb_buffer);
+       fs_info->sb_buffer->fd = fp;
+       fs_info->sb_buffer->dev_bytenr = sb_bytenr;
+       ret = read_extent_from_disk(fs_info->sb_buffer);
+       BUG_ON(ret);
+       btrfs_set_buffer_uptodate(fs_info->sb_buffer);
+
        read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
                           sizeof(fs_info->super_copy));
        read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
@@ -433,8 +484,24 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr)
        tree_root->sectorsize = sectorsize;
        tree_root->stripesize = stripesize;
 
+       ret = btrfs_read_sys_array(tree_root);
+       BUG_ON(ret);
+       blocksize = btrfs_level_size(tree_root,
+                                    btrfs_super_chunk_root_level(disk_super));
+
+       __setup_root(nodesize, leafsize, sectorsize, stripesize,
+                    chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
+       chunk_root->node = read_tree_block(chunk_root,
+                                          btrfs_super_chunk_root(disk_super),
+                                          blocksize);
+
+       BUG_ON(!chunk_root->node);
+       ret = btrfs_read_chunk_tree(chunk_root);
+       BUG_ON(ret);
+
        blocksize = btrfs_level_size(tree_root,
                                     btrfs_super_root_level(disk_super));
+
        tree_root->node = read_tree_block(tree_root,
                                          btrfs_super_root(disk_super),
                                          blocksize);
@@ -442,6 +509,13 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr)
        ret = find_and_setup_root(tree_root, fs_info,
                                  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
        BUG_ON(ret);
+       extent_root->track_dirty = 1;
+
+       ret = find_and_setup_root(tree_root, fs_info,
+                                 BTRFS_DEV_TREE_OBJECTID, dev_root);
+       BUG_ON(ret);
+       dev_root->track_dirty = 1;
+
        ret = find_and_setup_root(tree_root, fs_info,
                                  BTRFS_FS_TREE_OBJECTID, root);
        BUG_ON(ret);
@@ -456,12 +530,17 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
 {
        int ret;
        struct btrfs_root *tree_root = root->fs_info->tree_root;
+       struct btrfs_root *chunk_root = root->fs_info->chunk_root;
        btrfs_set_super_generation(&root->fs_info->super_copy,
                                   trans->transid);
        btrfs_set_super_root(&root->fs_info->super_copy,
                             tree_root->node->start);
        btrfs_set_super_root_level(&root->fs_info->super_copy,
                                   btrfs_header_level(tree_root->node));
+       btrfs_set_super_chunk_root(&root->fs_info->super_copy,
+                                  chunk_root->node->start);
+       btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
+                                        btrfs_header_level(chunk_root->node));
        write_extent_buffer(root->fs_info->sb_buffer,
                            &root->fs_info->super_copy, 0,
                            sizeof(root->fs_info->super_copy));
@@ -471,6 +550,24 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+static int close_all_devices(struct btrfs_fs_info *fs_info)
+{
+       struct list_head *list;
+       struct list_head *next;
+       struct btrfs_device *device;
+
+       list = &fs_info->devices;
+       while(!list_empty(list)) {
+               next = list->next;
+               list_del(next);
+               device = list_entry(next, struct btrfs_device, dev_list);
+               kfree(device->name);
+               close(device->fd);
+               kfree(device);
+       }
+       return 0;
+}
+
 int close_ctree(struct btrfs_root *root)
 {
        int ret;
@@ -497,6 +594,13 @@ int close_ctree(struct btrfs_root *root)
        free_extent_buffer(root->commit_root);
        free_extent_buffer(root->fs_info->sb_buffer);
 
+       if (root->fs_info->chunk_root->node);
+               free_extent_buffer(root->fs_info->chunk_root->node);
+
+       if (root->fs_info->dev_root->node);
+               free_extent_buffer(root->fs_info->dev_root->node);
+
+       close_all_devices(root->fs_info);
        extent_io_tree_cleanup(&fs_info->extent_cache);
        extent_io_tree_cleanup(&fs_info->free_space_cache);
        extent_io_tree_cleanup(&fs_info->block_group_cache);
@@ -507,6 +611,8 @@ int close_ctree(struct btrfs_root *root)
        free(fs_info->tree_root);
        free(fs_info->extent_root);
        free(fs_info->fs_root);
+       free(fs_info->chunk_root);
+       free(fs_info->dev_root);
        free(fs_info);
 
        return 0;
index 6e56b2eb84090365628fb16ae579a203609d6c36..a457284165d0aa0ed7a3200555cf0c4407aba20e 100644 (file)
--- a/disk-io.h
+++ b/disk-io.h
@@ -20,6 +20,7 @@
 #define __DISKIO__
 
 #define BTRFS_SUPER_INFO_OFFSET (16 * 1024)
+struct btrfs_device;
 
 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
                                      u32 blocksize);
@@ -50,4 +51,5 @@ void btrfs_csum_final(u32 crc, char *result);
 
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root);
+int btrfs_open_device(struct btrfs_device *dev);
 #endif
index 4e6d1b29ac71efadf30b015d425504dc0b0d4cf8..b871e18f18c789fd84f1fa37b92266584e926a4e 100644 (file)
@@ -96,10 +96,13 @@ int insert_existing_cache_extent(struct cache_tree *tree,
                                 struct cache_extent *pe)
 {
        struct rb_node *found;
+       struct cache_extent *entry;
 
        found = tree_insert(&tree->root, pe->start, pe->size, &pe->rb_node);
-       if (found)
+       if (found) {
+               entry = rb_entry(found, struct cache_extent, rb_node);
                return -EEXIST;
+       }
        return 0;
 }
 
index c907fd8a0ce0817ab8b1b2da7a2a1f77b15d746f..2a47827d184d95d463ad2d3b607c3a115c853e9a 100644 (file)
 #include "print-tree.h"
 #include "transaction.h"
 #include "crc32c.h"
+#include "volumes.h"
 
-#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
+#define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
+#define BLOCK_GROUP_SYSTEM   EXTENT_NEW
+
 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
 
 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
                                 btrfs_root *extent_root);
 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
                               btrfs_root *extent_root);
-static int find_previous_extent(struct btrfs_root *root,
-                               struct btrfs_path *path)
-{
-       struct btrfs_key found_key;
-       struct extent_buffer *leaf;
-       int ret;
 
-       while(1) {
-               if (path->slots[0] == 0) {
-                       ret = btrfs_prev_leaf(root, path);
-                       if (ret != 0)
-                               return ret;
-               } else {
-                       path->slots[0]--;
-               }
-               leaf = path->nodes[0];
-               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
-                       return 0;
-       }
-       return 1;
-}
 static int cache_block_group(struct btrfs_root *root,
                             struct btrfs_block_group_cache *block_group)
 {
@@ -91,7 +73,7 @@ static int cache_block_group(struct btrfs_root *root,
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                return ret;
-       ret = find_previous_extent(root, path);
+       ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
        if (ret < 0)
                return ret;
        if (ret == 0) {
@@ -168,7 +150,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
        block_group_cache = &info->block_group_cache;
        ret = find_first_extent_bit(block_group_cache,
                                    bytenr, &start, &end,
-                                   BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
+                                   BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
+                                   BLOCK_GROUP_SYSTEM);
        if (ret) {
                return NULL;
        }
@@ -182,9 +165,24 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
                return block_group;
        return NULL;
 }
-static u64 noinline find_search_start(struct btrfs_root *root,
+
+static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
+{
+       if ((bits & BLOCK_GROUP_DATA) &&
+           (cache->flags & BTRFS_BLOCK_GROUP_DATA))
+               return 1;
+       if ((bits & BLOCK_GROUP_METADATA) &&
+            (cache->flags & BTRFS_BLOCK_GROUP_METADATA))
+               return 1;
+       if ((bits & BLOCK_GROUP_SYSTEM) &&
+            (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
+               return 1;
+       return 0;
+}
+
+static int noinline find_search_start(struct btrfs_root *root,
                              struct btrfs_block_group_cache **cache_ret,
-                             u64 search_start, int num, int data)
+                             u64 *start_ret, int num, int data)
 {
        int ret;
        struct btrfs_block_group_cache *cache = *cache_ret;
@@ -192,6 +190,7 @@ static u64 noinline find_search_start(struct btrfs_root *root,
        u64 start = 0;
        u64 end = 0;
        u64 cache_miss = 0;
+       u64 search_start = *start_ret;
        int wrapped = 0;
 
        if (!cache) {
@@ -203,6 +202,9 @@ again:
                goto out;
 
        last = max(search_start, cache->key.objectid);
+       if (!block_group_bits(cache, data)) {
+               goto new_group;
+       }
 
        while(1) {
                ret = find_first_extent_bit(&root->fs_info->free_space_cache,
@@ -220,20 +222,18 @@ again:
                                cache_miss = start;
                        continue;
                }
-               if (data != BTRFS_BLOCK_GROUP_MIXED &&
-                   start + num > cache->key.objectid + cache->key.offset)
+               if (start + num > cache->key.objectid + cache->key.offset)
                        goto new_group;
-               return start;
+               *start_ret = start;
+               return 0;
        }
 out:
        cache = btrfs_lookup_block_group(root->fs_info, search_start);
        if (!cache) {
-               printk("Unable to find block group for %Lu\n",
-                      search_start);
+               printk("Unable to find block group for %Lu\n", search_start);
                WARN_ON(1);
-               return search_start;
        }
-       return search_start;
+       return -ENOSPC;
 
 new_group:
        last = cache->key.objectid + cache->key.offset;
@@ -244,7 +244,6 @@ no_cache:
                if (!wrapped) {
                        wrapped = 1;
                        last = search_start;
-                       data = BTRFS_BLOCK_GROUP_MIXED;
                        goto wrapped;
                }
                goto out;
@@ -291,26 +290,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
        int ret;
        int full_search = 0;
        int factor = 8;
-       int data_swap = 0;
 
        block_group_cache = &info->block_group_cache;
 
        if (!owner)
                factor = 8;
 
-       if (data == BTRFS_BLOCK_GROUP_MIXED) {
-               bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
-               factor = 10;
-       } else if (data)
-               bit = BLOCK_GROUP_DATA;
-       else
-               bit = BLOCK_GROUP_METADATA;
+       bit = data;
 
        if (search_start) {
                struct btrfs_block_group_cache *shint;
                shint = btrfs_lookup_block_group(info, search_start);
-               if (shint && (shint->data == data ||
-                             shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
+               if (shint && block_group_bits(shint, data)) {
                        used = btrfs_block_group_used(&shint->item);
                        if (used + shint->pinned <
                            div_factor(shint->key.offset, factor)) {
@@ -318,8 +309,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                        }
                }
        }
-       if (hint && (hint->data == data ||
-                    hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
+       if (hint && block_group_bits(hint, data)) {
                used = btrfs_block_group_used(&hint->item);
                if (used + hint->pinned <
                    div_factor(hint->key.offset, factor)) {
@@ -365,12 +355,6 @@ again:
                full_search = 1;
                goto again;
        }
-       if (!data_swap) {
-               data_swap = 1;
-               bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
-               last = search_start;
-               goto again;
-       }
 found:
        return found_group;
 }
@@ -985,7 +969,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc,
-                             int mark_free, int data)
+                             int mark_free)
 {
        struct btrfs_block_group_cache *cache;
        struct btrfs_fs_info *info = root->fs_info;
@@ -1010,41 +994,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                old_val = btrfs_block_group_used(&cache->item);
                num_bytes = min(total, cache->key.offset - byte_in_group);
                if (alloc) {
-                       if (cache->data != data &&
-                           old_val < (cache->key.offset >> 1)) {
-                               int bit_to_clear;
-                               int bit_to_set;
-                               cache->data = data;
-                               if (data) {
-                                       bit_to_clear = BLOCK_GROUP_METADATA;
-                                       bit_to_set = BLOCK_GROUP_DATA;
-                                       cache->item.flags &=
-                                               ~BTRFS_BLOCK_GROUP_MIXED;
-                                       cache->item.flags |=
-                                               BTRFS_BLOCK_GROUP_DATA;
-                               } else {
-                                       bit_to_clear = BLOCK_GROUP_DATA;
-                                       bit_to_set = BLOCK_GROUP_METADATA;
-                                       cache->item.flags &=
-                                               ~BTRFS_BLOCK_GROUP_MIXED;
-                                       cache->item.flags &=
-                                               ~BTRFS_BLOCK_GROUP_DATA;
-                               }
-                               clear_extent_bits(&info->block_group_cache,
-                                                 start, end, bit_to_clear,
-                                                 GFP_NOFS);
-                               set_extent_bits(&info->block_group_cache,
-                                               start, end, bit_to_set,
-                                               GFP_NOFS);
-                       } else if (cache->data != data &&
-                                  cache->data != BTRFS_BLOCK_GROUP_MIXED) {
-                               cache->data = BTRFS_BLOCK_GROUP_MIXED;
-                               set_extent_bits(&info->block_group_cache,
-                                               start, end,
-                                               BLOCK_GROUP_DATA |
-                                               BLOCK_GROUP_METADATA,
-                                               GFP_NOFS);
-                       }
                        old_val += num_bytes;
                } else {
                        old_val -= num_bytes;
@@ -1341,7 +1290,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
                        ops->free_extent(root, bytenr, num_bytes);
 
                ret = update_block_group(trans, root, bytenr, num_bytes, 0,
-                                        mark_free, 0);
+                                        mark_free);
                BUG_ON(ret);
        }
        btrfs_free_path(path);
@@ -1434,36 +1383,21 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
                                     u64 exclude_start, u64 exclude_nr,
                                     int data)
 {
-       struct btrfs_path *path;
-       struct btrfs_key key;
-       u64 hole_size = 0;
-       u64 aligned;
        int ret;
-       int slot = 0;
-       u64 last_byte = 0;
        u64 orig_search_start = search_start;
-       int start_found;
-       struct extent_buffer *l;
        struct btrfs_root * root = orig_root->fs_info->extent_root;
        struct btrfs_fs_info *info = root->fs_info;
        u64 total_needed = num_bytes;
-       int level;
        struct btrfs_block_group_cache *block_group;
        int full_scan = 0;
        int wrapped = 0;
-       u64 cached_start;
 
        WARN_ON(num_bytes < root->sectorsize);
        btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
 
-       level = btrfs_header_level(root->node);
-
-       if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
-               data = BTRFS_BLOCK_GROUP_MIXED;
-       }
-
        if (search_end == (u64)-1)
                search_end = btrfs_super_total_bytes(&info->super_copy);
+
        if (hint_byte) {
                block_group = btrfs_lookup_block_group(info, hint_byte);
                if (!block_group)
@@ -1477,7 +1411,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
        }
 
        total_needed += empty_size;
-       path = btrfs_alloc_path();
+
 check_failed:
        if (!block_group) {
                block_group = btrfs_lookup_block_group(info, search_start);
@@ -1485,135 +1419,49 @@ check_failed:
                        block_group = btrfs_lookup_block_group(info,
                                                       orig_search_start);
        }
-       search_start = find_search_start(root, &block_group, search_start,
-                                        total_needed, data);
-       search_start = stripe_align(root, search_start);
-       cached_start = search_start;
-       btrfs_init_path(path);
-       ins->objectid = search_start;
-       ins->offset = 0;
-       start_found = 0;
-       path->reada = 2;
-
-       ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
-       if (ret < 0)
-               goto error;
-       ret = find_previous_extent(root, path);
-       if (ret < 0)
+       ret = find_search_start(root, &block_group, &search_start,
+                               total_needed, data);
+       if (ret)
                goto error;
-       l = path->nodes[0];
-       btrfs_item_key_to_cpu(l, &key, path->slots[0]);
-       while (1) {
-               l = path->nodes[0];
-               slot = path->slots[0];
-               if (slot >= btrfs_header_nritems(l)) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret == 0)
-                               continue;
-                       if (ret < 0)
-                               goto error;
-
-                       search_start = max(search_start,
-                                          block_group->key.objectid);
-                       if (!start_found) {
-                               aligned = stripe_align(root, search_start);
-                               ins->objectid = aligned;
-                               if (aligned >= search_end) {
-                                       ret = -ENOSPC;
-                                       goto error;
-                               }
-                               ins->offset = search_end - aligned;
-                               start_found = 1;
-                               goto check_pending;
-                       }
-                       ins->objectid = stripe_align(root,
-                                                    last_byte > search_start ?
-                                                    last_byte : search_start);
-                       if (search_end <= ins->objectid) {
-                               ret = -ENOSPC;
-                               goto error;
-                       }
-                       ins->offset = search_end - ins->objectid;
-                       BUG_ON(ins->objectid >= search_end);
-                       goto check_pending;
-               }
-               btrfs_item_key_to_cpu(l, &key, slot);
-
-               if (key.objectid >= search_start && key.objectid > last_byte &&
-                   start_found) {
-                       if (last_byte < search_start)
-                               last_byte = search_start;
-                       aligned = stripe_align(root, last_byte);
-                       hole_size = key.objectid - aligned;
-                       if (key.objectid > aligned && hole_size >= num_bytes) {
-                               ins->objectid = aligned;
-                               ins->offset = hole_size;
-                               goto check_pending;
-                       }
-               }
-               if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
-                       if (!start_found && btrfs_key_type(&key) ==
-                           BTRFS_BLOCK_GROUP_ITEM_KEY) {
-                               last_byte = key.objectid;
-                               start_found = 1;
-                       }
-                       goto next;
-               }
 
-
-               start_found = 1;
-               last_byte = key.objectid + key.offset;
-
-               if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
-                   last_byte >= block_group->key.objectid +
-                   block_group->key.offset) {
-                       btrfs_release_path(root, path);
-                       search_start = block_group->key.objectid +
-                               block_group->key.offset;
-                       goto new_group;
-               }
-next:
-               path->slots[0]++;
-               cond_resched();
-       }
-check_pending:
-       /* we have to make sure we didn't find an extent that has already
-        * been allocated by the map tree or the original allocation
-        */
-       btrfs_release_path(root, path);
-       BUG_ON(ins->objectid < search_start);
+       search_start = stripe_align(root, search_start);
+       ins->objectid = search_start;
+       ins->offset = num_bytes;
 
        if (ins->objectid + num_bytes >= search_end)
                goto enospc;
-       if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
-           ins->objectid + num_bytes > block_group->
-           key.objectid + block_group->key.offset) {
+
+       if (ins->objectid + num_bytes >
+           block_group->key.objectid + block_group->key.offset) {
                search_start = block_group->key.objectid +
                        block_group->key.offset;
                goto new_group;
        }
+
        if (test_range_bit(&info->extent_ins, ins->objectid,
                           ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
                search_start = ins->objectid + num_bytes;
                goto new_group;
        }
+
        if (test_range_bit(&info->pinned_extents, ins->objectid,
                           ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
                search_start = ins->objectid + num_bytes;
                goto new_group;
        }
+
        if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
            ins->objectid < exclude_start + exclude_nr)) {
                search_start = exclude_start + exclude_nr;
                goto new_group;
        }
-       if (!data) {
+
+       if (!(data & BLOCK_GROUP_DATA)) {
                block_group = btrfs_lookup_block_group(info, ins->objectid);
                if (block_group)
                        trans->block_group = block_group;
        }
        ins->offset = num_bytes;
-       btrfs_free_path(path);
        return 0;
 
 new_group:
@@ -1628,7 +1476,6 @@ enospc:
                        if (!full_scan)
                                total_needed -= empty_size;
                        full_scan = 1;
-                       data = BTRFS_BLOCK_GROUP_MIXED;
                } else
                        wrapped = 1;
        }
@@ -1639,8 +1486,6 @@ enospc:
        goto check_failed;
 
 error:
-       btrfs_release_path(root, path);
-       btrfs_free_path(path);
        return ret;
 }
 /*
@@ -1670,6 +1515,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        struct btrfs_extent_ref *ref;
        struct btrfs_key keys[2];
 
+       if (data)
+               data = BLOCK_GROUP_DATA;
+       else if (info->force_system_allocs || root == root->fs_info->chunk_root)
+               data = BLOCK_GROUP_SYSTEM;
+       else
+               data = BLOCK_GROUP_METADATA;
+
        WARN_ON(num_bytes < root->sectorsize);
        if (ops && ops->alloc_extent) {
                ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
@@ -1699,7 +1551,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
                set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
                                ins->objectid + ins->offset - 1,
                                EXTENT_LOCKED, GFP_NOFS);
-               WARN_ON(data == 1);
                goto update_block;
        }
 
@@ -1749,8 +1600,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        }
 
 update_block:
-       ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
-                                data);
+       ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
        if (ret) {
                printk("update block group failed for %Lu %Lu\n",
                       ins->objectid, ins->offset);
@@ -1812,6 +1662,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                btrfs_free_extent(trans, root, ins.objectid, blocksize,
                                  root->root_key.objectid, ref_generation,
                                  0, 0, 0);
+               BUG_ON(1);
                return ERR_PTR(-ENOMEM);
        }
        btrfs_set_buffer_uptodate(buf);
@@ -2179,11 +2030,44 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
        return 0;
 }
 
+int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
+                          struct btrfs_key *key)
+{
+       int ret;
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf;
+       int slot;
+
+       ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+       if (ret < 0)
+               return ret;
+       while(1) {
+               slot = path->slots[0];
+               leaf = path->nodes[0];
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret == 0)
+                               continue;
+                       if (ret < 0)
+                               goto error;
+                       break;
+               }
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+               if (found_key.objectid >= key->objectid &&
+                   found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
+                       return 0;
+               path->slots[0]++;
+       }
+       ret = -ENOENT;
+error:
+       return ret;
+}
+
 int btrfs_read_block_groups(struct btrfs_root *root)
 {
        struct btrfs_path *path;
        int ret;
-       int err = 0;
        int bit;
        struct btrfs_block_group_cache *cache;
        struct btrfs_fs_info *info = root->fs_info;
@@ -2196,25 +2080,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
        root = info->extent_root;
        key.objectid = 0;
-       key.offset = BTRFS_BLOCK_GROUP_SIZE;
+       key.offset = 0;
        btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
-
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
        while(1) {
-               ret = btrfs_search_slot(NULL, info->extent_root,
-                                       &key, path, 0, 0);
+               ret = find_first_block_group(root, path, &key);
+               if (ret > 0) {
+                       ret = 0;
+                       goto error;
+               }
                if (ret != 0) {
-                       err = ret;
-                       break;
+                       goto error;
                }
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
                cache = kmalloc(sizeof(*cache), GFP_NOFS);
                if (!cache) {
-                       err = -1;
+                       ret = -ENOMEM;
                        break;
                }
 
@@ -2226,16 +2111,14 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                cache->pinned = 0;
                key.objectid = found_key.objectid + found_key.offset;
                btrfs_release_path(root, path);
-
-               if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
-                       bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
-                       cache->data = BTRFS_BLOCK_GROUP_MIXED;
-               } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
+               cache->flags = btrfs_block_group_flags(&cache->item);
+               bit = 0;
+               if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
                        bit = BLOCK_GROUP_DATA;
-                       cache->data = BTRFS_BLOCK_GROUP_DATA;
-               } else {
+               } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
+                       bit = BLOCK_GROUP_SYSTEM;
+               } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
                        bit = BLOCK_GROUP_METADATA;
-                       cache->data = 0;
                }
 
                /* use EXTENT_LOCKED to prevent merging */
@@ -2249,9 +2132,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                    btrfs_super_total_bytes(&info->super_copy))
                        break;
        }
-
+       ret = 0;
+error:
        btrfs_free_path(path);
-       return 0;
+       return ret;
 }
 
 static int btrfs_insert_block_group(struct btrfs_trans_handle *trans,
@@ -2274,14 +2158,11 @@ static int btrfs_insert_block_group(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root)
+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root, u64 bytes_used,
+                          u64 type, u64 chunk_tree, u64 chunk_objectid,
+                          u64 size)
 {
-       u64 group_size;
-       u64 bytes_used;
-       u64 total_bytes;
-       u64 cur_start;
-       u64 nr = 0;
        int ret;
        int bit;
        struct btrfs_root *extent_root;
@@ -2290,46 +2171,34 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
 
        extent_root = root->fs_info->extent_root;
        block_group_cache = &root->fs_info->block_group_cache;
-       group_size = BTRFS_BLOCK_GROUP_SIZE;
-       bytes_used = btrfs_super_bytes_used(&root->fs_info->super_copy);
-       total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
-
-       cur_start = 0;
-       while (cur_start < total_bytes) {
-               cache = malloc(sizeof(*cache));
-               BUG_ON(!cache);
-               cache->key.objectid = cur_start;
-               cache->key.offset = group_size;
-               btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
-               memset(&cache->item, 0, sizeof(cache->item));
-               if (nr == 0)
-                       btrfs_set_block_group_used(&cache->item, bytes_used);
-               if (nr++ % 3) {
-                       bit = BLOCK_GROUP_DATA;
-                       cache->data = 1;
-                       cache->item.flags |= BTRFS_BLOCK_GROUP_DATA;
-               } else {
-                       bit = BLOCK_GROUP_METADATA;
-                       cache->data = 0;
-               }
 
-               set_extent_bits(block_group_cache, cur_start,
-                               cur_start + group_size - 1,
-                               bit | EXTENT_LOCKED, GFP_NOFS);
-               set_state_private(block_group_cache, cur_start,
-                                 (unsigned long)cache);
-               cur_start += group_size;
-       }
-       /* then insert all the items */
-       cur_start = 0;
-       while(cur_start < total_bytes) {
-               cache = btrfs_lookup_block_group(root->fs_info, cur_start);
-               BUG_ON(!cache);
-               ret = btrfs_insert_block_group(trans, root, &cache->key,
-                                              &cache->item);
-               BUG_ON(ret);
-               cur_start += group_size;
+       cache = malloc(sizeof(*cache));
+       BUG_ON(!cache);
+       cache->key.objectid = chunk_objectid;
+       cache->key.offset = size;
+       cache->cached = 0;
+       btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
+       memset(&cache->item, 0, sizeof(cache->item));
+       btrfs_set_block_group_used(&cache->item, bytes_used);
+       btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
+       btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
+       cache->flags = type;
+       btrfs_set_block_group_flags(&cache->item, type);
+
+       if (type & BTRFS_BLOCK_GROUP_DATA) {
+               bit = BLOCK_GROUP_DATA;
+       } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+               bit = BLOCK_GROUP_SYSTEM;
+       } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+               bit = BLOCK_GROUP_METADATA;
        }
+       set_extent_bits(block_group_cache, chunk_objectid,
+                       chunk_objectid + size - 1,
+                       bit | EXTENT_LOCKED, GFP_NOFS);
+       set_state_private(block_group_cache, chunk_objectid,
+                         (unsigned long)cache);
+       ret = btrfs_insert_block_group(trans, root, &cache->key, &cache->item);
+       BUG_ON(ret);
        return 0;
 }
 
@@ -2343,8 +2212,8 @@ u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation,
 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             u64 bytenr, u64 num_bytes, int alloc,
-                            int mark_free, int data)
+                            int mark_free)
 {
        return update_block_group(trans, root, bytenr, num_bytes,
-                                 alloc, mark_free, data);
+                                 alloc, mark_free);
 }
index 62f3568061dc7d2844f6ec3498e650d335a098c2..b663275f2f96efb4fdcd2c08ff014d56d38ef066 100644 (file)
@@ -552,8 +552,10 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
        int ret;
 
        eb = malloc(sizeof(struct extent_buffer) + blocksize);
-       if (!eb)
+       if (!eb) {
+               BUG();
                return NULL;
+       }
 
        eb->start = bytenr;
        eb->len = blocksize;
diff --git a/mkfs.c b/mkfs.c
index c3a84c03b0b4bccea017cbd9e83b53fd7555500d..d5cefac8ccd4135cbb80ae548800781f06931e42 100644 (file)
--- a/mkfs.c
+++ b/mkfs.c
@@ -33,6 +33,7 @@
 #include "kerncompat.h"
 #include "ctree.h"
 #include "disk-io.h"
+#include "volumes.h"
 #include "transaction.h"
 #include "utils.h"
 
@@ -108,6 +109,9 @@ static int make_root_dir(int fd) {
        struct btrfs_root *root;
        struct btrfs_trans_handle *trans;
        struct btrfs_key location;
+       u64 bytes_used;
+       u64 chunk_start = 0;
+       u64 chunk_size = 0;
        int ret;
 
        root = open_ctree_fd(fd, 0);
@@ -117,7 +121,40 @@ static int make_root_dir(int fd) {
                return -1;
        }
        trans = btrfs_start_transaction(root, 1);
-       ret = btrfs_make_block_groups(trans, root);
+       bytes_used = btrfs_super_bytes_used(&root->fs_info->super_copy);
+
+       root->fs_info->force_system_allocs = 1;
+       ret = btrfs_make_block_group(trans, root, bytes_used,
+                                    BTRFS_BLOCK_GROUP_SYSTEM,
+                                    BTRFS_CHUNK_TREE_OBJECTID,
+                                    0, BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+       BUG_ON(ret);
+       ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+                               &chunk_start, &chunk_size,
+                               BTRFS_BLOCK_GROUP_METADATA);
+       BUG_ON(ret);
+       ret = btrfs_make_block_group(trans, root, 0,
+                                    BTRFS_BLOCK_GROUP_METADATA,
+                                    BTRFS_CHUNK_TREE_OBJECTID,
+                                    chunk_start, chunk_size);
+       BUG_ON(ret);
+
+       root->fs_info->force_system_allocs = 0;
+       btrfs_commit_transaction(trans, root);
+       trans = btrfs_start_transaction(root, 1);
+       BUG_ON(!trans);
+
+       ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+                               &chunk_start, &chunk_size,
+                               BTRFS_BLOCK_GROUP_DATA);
+       BUG_ON(ret);
+       ret = btrfs_make_block_group(trans, root, 0,
+                                    BTRFS_BLOCK_GROUP_DATA,
+                                    BTRFS_CHUNK_TREE_OBJECTID,
+                                    chunk_start, chunk_size);
+       BUG_ON(ret);
+
+       // ret = btrfs_make_block_group(trans, root, 0, 1);
        ret = btrfs_make_root_dir(trans, root->fs_info->tree_root,
                              BTRFS_ROOT_TREE_DIR_OBJECTID);
        if (ret)
@@ -179,7 +216,7 @@ int main(int ac, char **av)
        u32 sectorsize = 4096;
        u32 nodesize = 16 * 1024;
        u32 stripesize = 4096;
-       u64 blocks[4];
+       u64 blocks[6];
        int zero_end = 0;
 
        while(1) {
@@ -261,10 +298,10 @@ int main(int ac, char **av)
                }
        }
 
-       for (i = 0; i < 4; i++)
+       for (i = 0; i < 6; i++)
                blocks[i] = BTRFS_SUPER_INFO_OFFSET + leafsize * i;
 
-       ret = make_btrfs(fd, blocks, block_count, nodesize, leafsize,
+       ret = make_btrfs(fd, file, blocks, block_count, nodesize, leafsize,
                         sectorsize, stripesize);
        if (ret) {
                fprintf(stderr, "error during mkfs %d\n", ret);
index db4633954cb9d5f80cd904e08a02c2ad49306457..ec99b8cad492837d49ef615b0e762fd443d976e2 100644 (file)
@@ -75,6 +75,40 @@ static int print_inode_ref_item(struct extent_buffer *eb, struct btrfs_item *ite
        return 0;
 }
 
+static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
+{
+       int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
+       int i;
+       printf("\t\tchunk owner %llu type %llu num_stripes %d\n",
+              (unsigned long long)btrfs_chunk_owner(eb, chunk),
+              (unsigned long long)btrfs_chunk_type(eb, chunk),
+              num_stripes);
+       for (i = 0 ; i < num_stripes ; i++) {
+               printf("\t\t\tstripe %d devid %llu offset %llu\n", i,
+                     (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i),
+                     (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i));
+       }
+}
+static void print_dev_item(struct extent_buffer *eb,
+                          struct btrfs_dev_item *dev_item)
+{
+       char *name;
+       int name_len;
+
+       name_len = btrfs_device_name_len(eb, dev_item);
+       name = kmalloc(name_len, GFP_NOFS);
+       if (name) {
+               read_extent_buffer(eb, name,
+                                  (unsigned long)btrfs_device_name(dev_item),
+                                  name_len);
+       }
+       printf("\t\tdev item name %.*s devid %llu "
+              "total_bytes %llu bytes used %Lu\n", name_len, name,
+              (unsigned long long)btrfs_device_id(eb, dev_item),
+              (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
+              (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
+       kfree(name);
+}
 void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 {
        int i;
@@ -89,6 +123,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
        struct btrfs_block_group_item *bi;
        struct btrfs_extent_ref *ref;
        struct btrfs_inode_ref *iref;
+       struct btrfs_dev_extent *dev_extent;
        struct btrfs_disk_key disk_key;
        struct btrfs_root_item root_item;
        struct btrfs_block_group_item bg_item;
@@ -190,9 +225,23 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
                                            struct btrfs_block_group_item);
                        read_extent_buffer(l, &bg_item, (unsigned long)bi,
                                           sizeof(bg_item));
-                       printf("\t\tblock group used %llu flags %x\n",
+                       printf("\t\tblock group used %llu flags %llx\n",
                               (unsigned long long)btrfs_block_group_used(&bg_item),
-                              bg_item.flags);
+                              (unsigned long long)btrfs_block_group_flags(&bg_item));
+                       break;
+               case BTRFS_CHUNK_ITEM_KEY:
+                       print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk));
+                       break;
+               case BTRFS_DEV_ITEM_KEY:
+                       print_dev_item(l, btrfs_item_ptr(l, i,
+                                       struct btrfs_dev_item));
+                       break;
+               case BTRFS_DEV_EXTENT_KEY:
+                       dev_extent = btrfs_item_ptr(l, i,
+                                                   struct btrfs_dev_extent);
+                       printf("\t\tdev extent owner %llu length %llu\n",
+                              (unsigned long long)btrfs_dev_extent_owner(l, dev_extent),
+                              (unsigned long long)btrfs_dev_extent_length(l, dev_extent));
                        break;
                case BTRFS_STRING_ITEM_KEY:
                        /* dirty, but it's simple */
diff --git a/utils.c b/utils.c
index 3a618ac85562ffa18385d22a06314cb6b070da9d..f04e9c5ac5ed86722dd31ae10bcc6d43a750e5e7 100644 (file)
--- a/utils.c
+++ b/utils.c
 #include "transaction.h"
 #include "crc32c.h"
 #include "utils.h"
-static u64 reference_root_table[4] = {
+static u64 reference_root_table[6] = {
        [1] =   BTRFS_ROOT_TREE_OBJECTID,
        [2] =   BTRFS_EXTENT_TREE_OBJECTID,
-       [3] =   BTRFS_FS_TREE_OBJECTID,
+       [3] =   BTRFS_CHUNK_TREE_OBJECTID,
+       [4] =   BTRFS_DEV_TREE_OBJECTID,
+       [5] =   BTRFS_FS_TREE_OBJECTID,
 };
 
-int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize,
+int make_btrfs(int fd, char *device_name,
+              u64 blocks[6], u64 num_bytes, u32 nodesize,
               u32 leafsize, u32 sectorsize, u32 stripesize)
 {
        struct btrfs_super_block super;
@@ -48,6 +51,10 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize,
        struct btrfs_extent_ref *extent_ref;
        struct btrfs_extent_item *extent_item;
        struct btrfs_inode_item *inode_item;
+       struct btrfs_chunk *chunk;
+       struct btrfs_dev_item *dev_item;
+       struct btrfs_dev_extent *dev_extent;
+       u8 *ptr;
        int i;
        int ret;
        u32 itemoff;
@@ -56,6 +63,8 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize,
        u64 first_free;
        u64 ref_gen;
        u64 ref_root;
+       u32 array_size;
+       u32 item_size;
 
        first_free = BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1;
        first_free &= ~((u64)sectorsize - 1);
@@ -66,27 +75,24 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize,
        strncpy((char *)&super.magic, BTRFS_MAGIC, sizeof(super.magic));
        btrfs_set_super_generation(&super, 1);
        btrfs_set_super_root(&super, blocks[1]);
+       btrfs_set_super_chunk_root(&super, blocks[3]);
        btrfs_set_super_total_bytes(&super, num_bytes);
-       btrfs_set_super_bytes_used(&super, first_free + 3 * leafsize);
+       btrfs_set_super_bytes_used(&super, first_free + 5 * leafsize);
        btrfs_set_super_root_dir(&super, 0);
        btrfs_set_super_sectorsize(&super, sectorsize);
        btrfs_set_super_leafsize(&super, leafsize);
        btrfs_set_super_nodesize(&super, nodesize);
        btrfs_set_super_stripesize(&super, stripesize);
        btrfs_set_super_root_level(&super, 0);
+       btrfs_set_super_chunk_root_level(&super, 0);
+       btrfs_set_super_sys_array_size(&super, 0);
 
        buf = malloc(sizeof(*buf) + max(sectorsize, leafsize));
 
-       BUG_ON(sizeof(super) > sectorsize);
-       memset(buf->data, 0, sectorsize);
-       memcpy(buf->data, &super, sizeof(super));
-       ret = pwrite(fd, buf->data, sectorsize, blocks[0]);
-       BUG_ON(ret != sectorsize);
-
        /* create the tree of root objects */
        memset(buf->data, 0, leafsize);
        btrfs_set_header_bytenr(buf, blocks[1]);
-       btrfs_set_header_nritems(buf, 2);
+       btrfs_set_header_nritems(buf, 3);
        btrfs_set_header_generation(buf, 1);
        btrfs_set_header_owner(buf, BTRFS_ROOT_TREE_OBJECTID);
        write_extent_buffer(buf, super.fsid, (unsigned long)
@@ -117,13 +123,23 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize,
                            sizeof(root_item));
 
        itemoff = itemoff - sizeof(root_item);
-       btrfs_set_root_bytenr(&root_item, blocks[3]);
+       btrfs_set_root_bytenr(&root_item, blocks[5]);
        btrfs_set_disk_key_objectid(&disk_key, BTRFS_FS_TREE_OBJECTID);
        btrfs_set_item_key(buf, &disk_key, 1);
        btrfs_set_item_offset(buf, btrfs_item_nr(buf, 1), itemoff);
        btrfs_set_item_size(buf, btrfs_item_nr(buf, 1), sizeof(root_item));
        write_extent_buffer(buf, &root_item, btrfs_item_ptr_offset(buf, 1),
                            sizeof(root_item));
+
+       itemoff = itemoff - sizeof(root_item);
+       btrfs_set_root_bytenr(&root_item, blocks[4]);
+       btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_TREE_OBJECTID);
+       btrfs_set_item_key(buf, &disk_key, 2);
+       btrfs_set_item_offset(buf, btrfs_item_nr(buf, 2), itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(buf, 2), sizeof(root_item));
+       write_extent_buffer(buf, &root_item, btrfs_item_ptr_offset(buf, 2),
+                           sizeof(root_item));
+
        ret = pwrite(fd, buf->data, leafsize, blocks[1]);
        BUG_ON(ret != leafsize);
 
@@ -140,7 +156,7 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize,
        extent_item = btrfs_item_ptr(buf, nritems, struct btrfs_extent_item);
        btrfs_set_extent_refs(buf, extent_item, 1);
        nritems++;
-       for (i = 1; i < 4; i++) {
+       for (i = 1; i < 6; i++) {
                BUG_ON(blocks[i] < first_free);
                BUG_ON(blocks[i] < blocks[i - 1]);
 
@@ -190,13 +206,127 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize,
        ret = pwrite(fd, buf->data, leafsize, blocks[2]);
        BUG_ON(ret != leafsize);
 
-       /* finally create the FS root */
+       /* create the chunk tree */
+       nritems = 0;
+       item_size = btrfs_chunk_item_size(1);
+       itemoff = __BTRFS_LEAF_DATA_SIZE(leafsize) - item_size;
+
+       /* first we have chunk 0 */
+       btrfs_set_disk_key_objectid(&disk_key, 0);
+       btrfs_set_disk_key_offset(&disk_key, BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+       btrfs_set_disk_key_type(&disk_key, BTRFS_CHUNK_ITEM_KEY);
+       btrfs_set_item_key(buf, &disk_key, nritems);
+       btrfs_set_item_offset(buf, btrfs_item_nr(buf, nritems), itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(buf,  nritems), item_size);
+
+       chunk = btrfs_item_ptr(buf, nritems, struct btrfs_chunk);
+       btrfs_set_chunk_owner(buf, chunk, BTRFS_EXTENT_TREE_OBJECTID);
+       btrfs_set_chunk_stripe_len(buf, chunk, 64 * 1024);
+       btrfs_set_chunk_type(buf, chunk, BTRFS_BLOCK_GROUP_SYSTEM);
+       btrfs_set_chunk_io_align(buf, chunk, sectorsize);
+       btrfs_set_chunk_io_width(buf, chunk, sectorsize);
+       btrfs_set_chunk_sector_size(buf, chunk, sectorsize);
+       btrfs_set_chunk_num_stripes(buf, chunk, 1);
+       btrfs_set_stripe_devid_nr(buf, chunk, 0, 1);
+       btrfs_set_stripe_offset_nr(buf, chunk, 0, 0);
+
+       /* copy the key for the chunk to the system array */
+       ptr = super.sys_chunk_array;
+       array_size = sizeof(disk_key);
+
+       memcpy(ptr, &disk_key, sizeof(disk_key));
+       ptr += sizeof(disk_key);
+
+       /* copy the chunk to the system array */
+       read_extent_buffer(buf, ptr, (unsigned long)chunk, item_size);
+       array_size += item_size;
+       ptr += item_size;
+
+       /* then device 1 (there is no device 0) */
+       nritems++;
+       item_size = sizeof(*dev_item) + strlen(device_name);
+       itemoff = itemoff - item_size;
+       btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_ITEMS_OBJECTID);
+       btrfs_set_disk_key_offset(&disk_key, 1);
+       btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_ITEM_KEY);
+       btrfs_set_item_key(buf, &disk_key, nritems);
+       btrfs_set_item_offset(buf, btrfs_item_nr(buf, nritems), itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(buf, nritems), item_size);
+
+       dev_item = btrfs_item_ptr(buf, nritems, struct btrfs_dev_item);
+       btrfs_set_device_id(buf, dev_item, 1);
+       btrfs_set_device_total_bytes(buf, dev_item, num_bytes);
+       btrfs_set_device_bytes_used(buf, dev_item,
+                                   BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+       btrfs_set_device_io_align(buf, dev_item, sectorsize);
+       btrfs_set_device_io_width(buf, dev_item, sectorsize);
+       btrfs_set_device_sector_size(buf, dev_item, sectorsize);
+       btrfs_set_device_rdev(buf, dev_item, 0);
+       btrfs_set_device_type(buf, dev_item, 0);
+       btrfs_set_device_partition(buf, dev_item, 0);
+       btrfs_set_device_name_len(buf, dev_item, strlen(device_name));
+       nritems++;
+
+       memset_extent_buffer(buf, 0, (unsigned long)btrfs_device_uuid(dev_item),
+                            BTRFS_DEV_UUID_SIZE);
+       write_extent_buffer(buf, device_name,
+                           (unsigned long)btrfs_device_name(dev_item),
+                           strlen(device_name));
+
+       /* copy the device item and key into the system array */
+       memcpy(ptr, &disk_key, sizeof(disk_key));
+       ptr += sizeof(disk_key);
+       array_size += sizeof(disk_key);
+
+       read_extent_buffer(buf, ptr, (unsigned long)dev_item, item_size);
+       array_size += item_size;
+       ptr += item_size;
+
+       btrfs_set_super_sys_array_size(&super, array_size);
+
        btrfs_set_header_bytenr(buf, blocks[3]);
+       btrfs_set_header_owner(buf, BTRFS_CHUNK_TREE_OBJECTID);
+       btrfs_set_header_nritems(buf, nritems);
+       ret = pwrite(fd, buf->data, leafsize, blocks[3]);
+
+       /* create the device tree */
+       nritems = 0;
+       itemoff = __BTRFS_LEAF_DATA_SIZE(leafsize) -
+               sizeof(struct btrfs_dev_extent);
+
+       btrfs_set_disk_key_objectid(&disk_key, 1);
+       btrfs_set_disk_key_offset(&disk_key, 0);
+       btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_EXTENT_KEY);
+       btrfs_set_item_key(buf, &disk_key, nritems);
+       btrfs_set_item_offset(buf, btrfs_item_nr(buf, nritems), itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(buf,  nritems),
+                           sizeof(struct btrfs_dev_extent));
+       dev_extent = btrfs_item_ptr(buf, nritems, struct btrfs_dev_extent);
+       btrfs_set_dev_extent_owner(buf, dev_extent, 0);
+       btrfs_set_dev_extent_length(buf, dev_extent,
+                                   BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+       nritems++;
+
+       btrfs_set_header_bytenr(buf, blocks[4]);
+       btrfs_set_header_owner(buf, BTRFS_DEV_TREE_OBJECTID);
+       btrfs_set_header_nritems(buf, nritems);
+       ret = pwrite(fd, buf->data, leafsize, blocks[4]);
+
+       /* finally create the FS root */
+       btrfs_set_header_bytenr(buf, blocks[5]);
        btrfs_set_header_owner(buf, BTRFS_FS_TREE_OBJECTID);
        btrfs_set_header_nritems(buf, 0);
-       ret = pwrite(fd, buf->data, leafsize, blocks[3]);
+       ret = pwrite(fd, buf->data, leafsize, blocks[5]);
        BUG_ON(ret != leafsize);
 
+       /* and write out the super block */
+       BUG_ON(sizeof(super) > sectorsize);
+       memset(buf->data, 0, sectorsize);
+       memcpy(buf->data, &super, sizeof(super));
+       ret = pwrite(fd, buf->data, sectorsize, blocks[0]);
+       BUG_ON(ret != sectorsize);
+
+
        free(buf);
        return 0;
 }
diff --git a/utils.h b/utils.h
index 48352f2b24512f6da0a78f9978f3a45932ec60d3..9dc8cf86317dc2f71ad596773e340819cd837cbd 100644 (file)
--- a/utils.h
+++ b/utils.h
 
 #ifndef __UTILS__
 #define __UTILS__
-int make_btrfs(int fd, u64 new_blocks[4], u64 num_bytes, u32 nodesize,
-               u32 leafsize, u32 sectorsize, u32 stripesize);
+
+#define BTRFS_MKFS_SYSTEM_GROUP_SIZE (4 * 1024 * 1024)
+
+int make_btrfs(int fd, char *device_name,
+              u64 blocks[6], u64 num_bytes, u32 nodesize,
+              u32 leafsize, u32 sectorsize, u32 stripesize);
 int btrfs_make_root_dir(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, u64 objectid);
 #endif
diff --git a/volumes.c b/volumes.c
new file mode 100644 (file)
index 0000000..2fb5a20
--- /dev/null
+++ b/volumes.c
@@ -0,0 +1,771 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "print-tree.h"
+#include "volumes.h"
+
+struct map_lookup {
+       struct cache_extent ce;
+       struct btrfs_device *dev;
+       u64 physical;
+};
+
+/*
+ * this uses a pretty simple search, the expectation is that it is
+ * called very infrequently and that a given device has a small number
+ * of extents
+ */
+static int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                               struct btrfs_device *device,
+                               struct btrfs_path *path,
+                               u64 num_bytes, u64 *start)
+{
+       struct btrfs_key key;
+       struct btrfs_root *root = device->dev_root;
+       struct btrfs_dev_extent *dev_extent = NULL;
+       u64 hole_size = 0;
+       u64 last_byte = 0;
+       u64 search_start = 0;
+       u64 search_end = device->total_bytes;
+       int ret;
+       int slot = 0;
+       int start_found;
+       struct extent_buffer *l;
+
+       start_found = 0;
+       path->reada = 2;
+
+       /* FIXME use last free of some kind */
+
+       key.objectid = device->devid;
+       key.offset = search_start;
+       key.type = BTRFS_DEV_EXTENT_KEY;
+       ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto error;
+       ret = btrfs_previous_item(root, path, 0, key.type);
+       if (ret < 0)
+               goto error;
+       l = path->nodes[0];
+       btrfs_item_key_to_cpu(l, &key, path->slots[0]);
+       while (1) {
+               l = path->nodes[0];
+               slot = path->slots[0];
+               if (slot >= btrfs_header_nritems(l)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret == 0)
+                               continue;
+                       if (ret < 0)
+                               goto error;
+no_more_items:
+                       if (!start_found) {
+                               if (search_start >= search_end) {
+                                       ret = -ENOSPC;
+                                       goto error;
+                               }
+                               *start = search_start;
+                               start_found = 1;
+                               goto check_pending;
+                       }
+                       *start = last_byte > search_start ?
+                               last_byte : search_start;
+                       if (search_end <= *start) {
+                               ret = -ENOSPC;
+                               goto error;
+                       }
+                       goto check_pending;
+               }
+               btrfs_item_key_to_cpu(l, &key, slot);
+
+               if (key.objectid < device->devid)
+                       goto next;
+
+               if (key.objectid > device->devid)
+                       goto no_more_items;
+
+               if (key.offset >= search_start && key.offset > last_byte &&
+                   start_found) {
+                       if (last_byte < search_start)
+                               last_byte = search_start;
+                       hole_size = key.offset - last_byte;
+                       if (key.offset > last_byte &&
+                           hole_size >= num_bytes) {
+                               *start = last_byte;
+                               goto check_pending;
+                       }
+               }
+               if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
+                       goto next;
+               }
+
+               start_found = 1;
+               dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+               last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
+next:
+               path->slots[0]++;
+               cond_resched();
+       }
+check_pending:
+       /* we have to make sure we didn't find an extent that has already
+        * been allocated by the map tree or the original allocation
+        */
+       btrfs_release_path(root, path);
+       BUG_ON(*start < search_start);
+
+       if (*start + num_bytes >= search_end) {
+               ret = -ENOSPC;
+               goto error;
+       }
+       /* check for pending inserts here */
+       return 0;
+
+error:
+       btrfs_release_path(root, path);
+       return ret;
+}
+
+int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
+                          struct btrfs_device *device,
+                          u64 owner, u64 num_bytes, u64 *start)
+{
+       int ret;
+       struct btrfs_path *path;
+       struct btrfs_root *root = device->dev_root;
+       struct btrfs_dev_extent *extent;
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = find_free_dev_extent(trans, device, path, num_bytes, start);
+       if (ret)
+               goto err;
+
+       key.objectid = device->devid;
+       key.offset = *start;
+       key.type = BTRFS_DEV_EXTENT_KEY;
+       ret = btrfs_insert_empty_item(trans, root, path, &key,
+                                     sizeof(*extent));
+       BUG_ON(ret);
+
+       leaf = path->nodes[0];
+       extent = btrfs_item_ptr(leaf, path->slots[0],
+                               struct btrfs_dev_extent);
+       btrfs_set_dev_extent_owner(leaf, extent, owner);
+       btrfs_set_dev_extent_length(leaf, extent, num_bytes);
+       btrfs_mark_buffer_dirty(leaf);
+err:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int find_next_chunk(struct btrfs_root *root, u64 *objectid)
+{
+       struct btrfs_path *path;
+       int ret;
+       struct btrfs_key key;
+       struct btrfs_key found_key;
+
+       path = btrfs_alloc_path();
+       BUG_ON(!path);
+
+       key.objectid = (u64)-1;
+       key.offset = (u64)-1;
+       key.type = BTRFS_CHUNK_ITEM_KEY;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto error;
+
+       BUG_ON(ret == 0);
+
+       ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
+       if (ret) {
+               *objectid = 0;
+       } else {
+               btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+                                     path->slots[0]);
+               *objectid = found_key.objectid + found_key.offset;
+       }
+       ret = 0;
+error:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static struct btrfs_device *next_device(struct list_head *head,
+                                       struct list_head *last)
+{
+       struct list_head *next = last->next;
+       struct btrfs_device *dev;
+
+       if (list_empty(head))
+               return NULL;
+
+       if (next == head)
+               next = next->next;
+
+       dev = list_entry(next, struct btrfs_device, dev_list);
+       return dev;
+}
+
+static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
+                          u64 *objectid)
+{
+       int ret;
+       struct btrfs_key key;
+       struct btrfs_key found_key;
+
+       key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+       key.type = BTRFS_DEV_ITEM_KEY;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto error;
+
+       BUG_ON(ret == 0);
+
+       ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
+                                 BTRFS_DEV_ITEM_KEY);
+       if (ret) {
+               *objectid = 1;
+       } else {
+               btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+                                     path->slots[0]);
+               *objectid = found_key.offset + 1;
+       }
+       ret = 0;
+error:
+       btrfs_release_path(root, path);
+       return ret;
+}
+
+/*
+ * the device information is stored in the chunk root
+ * the btrfs_device struct should be fully filled in
+ */
+int btrfs_add_device(struct btrfs_trans_handle *trans,
+                    struct btrfs_root *root,
+                    struct btrfs_device *device)
+{
+       int ret;
+       struct btrfs_path *path;
+       struct btrfs_dev_item *dev_item;
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+       unsigned long ptr;
+       u64 free_devid;
+
+       root = root->fs_info->chunk_root;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = find_next_devid(root, path, &free_devid);
+       if (ret)
+               goto out;
+
+       key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+       key.type = BTRFS_DEV_ITEM_KEY;
+       key.offset = free_devid;
+
+       ret = btrfs_insert_empty_item(trans, root, path, &key,
+                                     sizeof(*dev_item) + device->name_len);
+       if (ret)
+               goto out;
+
+       leaf = path->nodes[0];
+       dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
+
+       btrfs_set_device_id(leaf, dev_item, device->devid);
+       btrfs_set_device_type(leaf, dev_item, device->type);
+       btrfs_set_device_io_align(leaf, dev_item, device->io_align);
+       btrfs_set_device_io_width(leaf, dev_item, device->io_width);
+       btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
+       btrfs_set_device_rdev(leaf, dev_item, device->rdev);
+       btrfs_set_device_partition(leaf, dev_item, device->partition);
+       btrfs_set_device_name_len(leaf, dev_item, device->name_len);
+       btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+       btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
+
+       ptr = (unsigned long)btrfs_device_name(dev_item);
+       write_extent_buffer(leaf, device->name, ptr, device->name_len);
+
+       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
+       btrfs_mark_buffer_dirty(leaf);
+       ret = 0;
+
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+int btrfs_update_device(struct btrfs_trans_handle *trans,
+                       struct btrfs_device *device)
+{
+       int ret;
+       struct btrfs_path *path;
+       struct btrfs_root *root;
+       struct btrfs_dev_item *dev_item;
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+
+       root = device->dev_root->fs_info->chunk_root;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+       key.type = BTRFS_DEV_ITEM_KEY;
+       key.offset = device->devid;
+
+       ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+       if (ret < 0)
+               goto out;
+
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       leaf = path->nodes[0];
+       dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
+
+       btrfs_set_device_id(leaf, dev_item, device->devid);
+       btrfs_set_device_type(leaf, dev_item, device->type);
+       btrfs_set_device_io_align(leaf, dev_item, device->io_align);
+       btrfs_set_device_io_width(leaf, dev_item, device->io_width);
+       btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
+       btrfs_set_device_rdev(leaf, dev_item, device->rdev);
+       btrfs_set_device_partition(leaf, dev_item, device->partition);
+       btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+       btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
+       btrfs_mark_buffer_dirty(leaf);
+
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
+                          struct btrfs_key *key,
+                          struct btrfs_chunk *chunk, int item_size)
+{
+       struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
+       struct btrfs_disk_key disk_key;
+       u32 array_size;
+       u8 *ptr;
+
+       array_size = btrfs_super_sys_array_size(super_copy);
+       if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
+               return -EFBIG;
+
+       ptr = super_copy->sys_chunk_array + array_size;
+       btrfs_cpu_key_to_disk(&disk_key, key);
+       memcpy(ptr, &disk_key, sizeof(disk_key));
+       ptr += sizeof(disk_key);
+       memcpy(ptr, chunk, item_size);
+       item_size += sizeof(disk_key);
+       btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
+       return 0;
+}
+
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *extent_root, u64 *start,
+                     u64 *num_bytes, u32 type)
+{
+       u64 dev_offset;
+       struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
+       struct btrfs_stripe *stripes;
+       struct btrfs_device *device = NULL;
+       struct btrfs_chunk *chunk;
+       struct list_head *dev_list = &extent_root->fs_info->devices;
+       struct list_head *last_dev = extent_root->fs_info->last_device;
+       struct map_lookup *map;
+       u64 physical;
+       u64 calc_size;
+       int num_stripes;
+       int ret;
+       int index = 0;
+       struct btrfs_key key;
+
+
+       ret = find_next_chunk(chunk_root, &key.objectid);
+       if (ret)
+               return ret;
+
+       num_stripes = 1;
+       chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
+       if (!chunk)
+               return -ENOMEM;
+
+       stripes = &chunk->stripe;
+
+       while(index < num_stripes) {
+               device = next_device(dev_list, last_dev);
+               BUG_ON(!device);
+               last_dev = &device->dev_list;
+               extent_root->fs_info->last_device = last_dev;
+
+               if (index == 0) {
+                       int mask = device->io_align;
+                       calc_size = (device->total_bytes * 95) / 100;
+                       calc_size = device->total_bytes - calc_size;
+                       calc_size = (calc_size / mask) * mask;
+                       *num_bytes = calc_size;
+               }
+
+               ret = btrfs_alloc_dev_extent(trans, device,
+                                            key.objectid,
+                                            calc_size, &dev_offset);
+               BUG_ON(ret);
+
+               device->bytes_used += calc_size;
+               ret = btrfs_update_device(trans, device);
+               BUG_ON(ret);
+
+               btrfs_set_stack_stripe_devid(stripes + index, device->devid);
+               btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
+               physical = dev_offset;
+               index++;
+       }
+
+       /* key.objectid was set above */
+       key.offset = *num_bytes;
+       key.type = BTRFS_CHUNK_ITEM_KEY;
+       btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
+       btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
+       btrfs_set_stack_chunk_type(chunk, type);
+       btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
+       btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize);
+       btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize);
+       btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
+
+       ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
+                               btrfs_chunk_item_size(num_stripes));
+       BUG_ON(ret);
+       *start = key.objectid;
+
+       map = kmalloc(sizeof(*map), GFP_NOFS);
+       if (!map)
+               return -ENOMEM;
+
+       map->ce.start = key.objectid;
+       map->ce.size = key.offset;
+
+       map->physical = physical;
+       map->dev = device;
+
+       if (!map->dev) {
+               kfree(map);
+               return -EIO;
+       }
+       ret = insert_existing_cache_extent(
+                          &extent_root->fs_info->mapping_tree.cache_tree,
+                          &map->ce);
+       BUG_ON(ret);
+
+       kfree(chunk);
+       return ret;
+}
+
+void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
+{
+       cache_tree_init(&tree->cache_tree);
+}
+
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
+                   u64 logical, u64 *phys, u64 *length,
+                   struct btrfs_device **dev)
+{
+       struct cache_extent *ce;
+       struct map_lookup *map;
+       u64 offset;
+
+       ce = find_first_cache_extent(&map_tree->cache_tree, logical);
+       BUG_ON(!ce);
+       BUG_ON(ce->start > logical || ce->start + ce->size < logical);
+       map = container_of(ce, struct map_lookup, ce);
+       offset = logical - ce->start;
+       *phys = map->physical + offset;
+       *length = ce->size - offset;
+       *dev = map->dev;
+       return 0;
+}
+
+struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid)
+{
+       struct btrfs_device *dev;
+       struct list_head *cur = root->fs_info->devices.next;
+       struct list_head *head = &root->fs_info->devices;
+
+       while(cur != head) {
+               dev = list_entry(cur, struct btrfs_device, dev_list);
+               if (dev->devid == devid)
+                       return dev;
+               cur = cur->next;
+       }
+       return NULL;
+}
+
+static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
+                         struct extent_buffer *leaf,
+                         struct btrfs_chunk *chunk)
+{
+       struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+       struct map_lookup *map;
+       struct cache_extent *ce;
+       u64 logical;
+       u64 length;
+       u64 devid;
+       int ret;
+
+       logical = key->objectid;
+       length = key->offset;
+       ce = find_first_cache_extent(&map_tree->cache_tree, logical);
+
+       /* already mapped? */
+       if (ce && ce->start <= logical && ce->start + ce->size > logical) {
+               return 0;
+       }
+
+       map = kmalloc(sizeof(*map), GFP_NOFS);
+       if (!map)
+               return -ENOMEM;
+
+       map->ce.start = logical;
+       map->ce.size = length;
+
+       map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0);
+       devid = btrfs_stripe_devid_nr(leaf, chunk, 0);
+       map->dev = btrfs_find_device(root, devid);
+
+       if (!map->dev) {
+               kfree(map);
+               return -EIO;
+       }
+       ret = insert_existing_cache_extent(&map_tree->cache_tree, &map->ce);
+       BUG_ON(ret);
+
+       return 0;
+}
+
+static int fill_device_from_item(struct extent_buffer *leaf,
+                                struct btrfs_dev_item *dev_item,
+                                struct btrfs_device *device)
+{
+       unsigned long ptr;
+       char *name;
+
+       device->devid = btrfs_device_id(leaf, dev_item);
+       device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+       device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
+       device->type = btrfs_device_type(leaf, dev_item);
+       device->io_align = btrfs_device_io_align(leaf, dev_item);
+       device->io_width = btrfs_device_io_width(leaf, dev_item);
+       device->sector_size = btrfs_device_sector_size(leaf, dev_item);
+       device->rdev = btrfs_device_rdev(leaf, dev_item);
+       device->partition = btrfs_device_partition(leaf, dev_item);
+       device->name_len = btrfs_device_name_len(leaf, dev_item);
+
+       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
+
+       name = kmalloc(device->name_len + 1, GFP_NOFS);
+       if (!name)
+               return -ENOMEM;
+       device->name = name;
+       ptr = (unsigned long)btrfs_device_name(dev_item);
+       read_extent_buffer(leaf, name, ptr, device->name_len);
+       name[device->name_len] = '\0';
+       return 0;
+}
+
+static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key,
+                       struct extent_buffer *leaf,
+                       struct btrfs_dev_item *dev_item)
+{
+       struct btrfs_device *device;
+       u64 devid;
+       int ret;
+
+       devid = btrfs_device_id(leaf, dev_item);
+       if (btrfs_find_device(root, devid))
+               return 0;
+
+       device = kmalloc(sizeof(*device), GFP_NOFS);
+       if (!device)
+               return -ENOMEM;
+
+       fill_device_from_item(leaf, dev_item, device);
+       device->dev_root = root->fs_info->dev_root;
+       device->fd = 0;
+       list_add(&device->dev_list, &root->fs_info->devices);
+       memcpy(&device->dev_key, key, sizeof(*key));
+
+       ret = btrfs_open_device(device);
+       if (ret) {
+               kfree(device);
+       }
+       return ret;
+}
+
+int btrfs_read_sys_array(struct btrfs_root *root)
+{
+       struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
+       struct extent_buffer *sb = root->fs_info->sb_buffer;
+       struct btrfs_disk_key *disk_key;
+       struct btrfs_dev_item *dev_item;
+       struct btrfs_chunk *chunk;
+       struct btrfs_key key;
+       u32 num_stripes;
+       u32 array_size;
+       u32 len = 0;
+       u8 *ptr;
+       unsigned long sb_ptr;
+       u32 cur;
+       int ret;
+       int dev_only = 1;
+
+       array_size = btrfs_super_sys_array_size(super_copy);
+
+       /*
+        * we do this loop twice, once for the device items and
+        * once for all of the chunks.  This way there are device
+        * structs filled in for every chunk
+        */
+again:
+       ptr = super_copy->sys_chunk_array;
+       sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
+       cur = 0;
+
+       while (cur < array_size) {
+               disk_key = (struct btrfs_disk_key *)ptr;
+               btrfs_disk_key_to_cpu(&key, disk_key);
+
+               len = sizeof(*disk_key);
+               ptr += len;
+               sb_ptr += len;
+               cur += len;
+
+               if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID &&
+                   key.type == BTRFS_DEV_ITEM_KEY) {
+                       dev_item = (struct btrfs_dev_item *)sb_ptr;
+                       if (dev_only) {
+                               ret = read_one_dev(root, &key, sb, dev_item);
+                               BUG_ON(ret);
+                       }
+                       len = sizeof(*dev_item);
+                       len += btrfs_device_name_len(sb, dev_item);
+               } else if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+
+                       chunk = (struct btrfs_chunk *)sb_ptr;
+                       if (!dev_only) {
+                               ret = read_one_chunk(root, &key, sb, chunk);
+                               BUG_ON(ret);
+                       }
+                       num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+                       len = btrfs_chunk_item_size(num_stripes);
+               } else {
+                       BUG();
+               }
+               ptr += len;
+               sb_ptr += len;
+               cur += len;
+       }
+       if (dev_only == 1) {
+               dev_only = 0;
+               goto again;
+       }
+       return 0;
+}
+
+int btrfs_read_chunk_tree(struct btrfs_root *root)
+{
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+       struct btrfs_key found_key;
+       int ret;
+       int slot;
+
+       root = root->fs_info->chunk_root;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       /* first we search for all of the device items, and then we
+        * read in all of the chunk items.  This way we can create chunk
+        * mappings that reference all of the devices that are afound
+        */
+       key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+       key.offset = 0;
+       key.type = 0;
+again:
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       while(1) {
+               leaf = path->nodes[0];
+               slot = path->slots[0];
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret == 0)
+                               continue;
+                       if (ret < 0)
+                               goto error;
+                       break;
+               }
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+               if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
+                       if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
+                               break;
+                       if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+                               struct btrfs_dev_item *dev_item;
+                               dev_item = btrfs_item_ptr(leaf, slot,
+                                                 struct btrfs_dev_item);
+                               ret = read_one_dev(root, &found_key, leaf,
+                                                  dev_item);
+                               BUG_ON(ret);
+                       }
+               } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
+                       struct btrfs_chunk *chunk;
+                       chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+                       ret = read_one_chunk(root, &found_key, leaf, chunk);
+               }
+               path->slots[0]++;
+       }
+       if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
+               key.objectid = 0;
+               btrfs_release_path(root, path);
+               goto again;
+       }
+
+       btrfs_free_path(path);
+       ret = 0;
+error:
+       return ret;
+}
+
diff --git a/volumes.h b/volumes.h
new file mode 100644 (file)
index 0000000..fb29b54
--- /dev/null
+++ b/volumes.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_VOLUMES_
+#define __BTRFS_VOLUMES_
+struct btrfs_device {
+       struct list_head dev_list;
+       struct btrfs_root *dev_root;
+       struct btrfs_key dev_key;
+
+       int fd;
+
+       /* the internal btrfs device id */
+       u64 devid;
+
+       /* size of the device */
+       u64 total_bytes;
+
+       /* bytes used */
+       u64 bytes_used;
+
+       /* optimal io alignment for this device */
+       u32 io_align;
+
+       /* optimal io width for this device */
+       u32 io_width;
+
+       /* minimal io size for this device */
+       u32 sector_size;
+
+       /* the kernel device number */
+       u64 rdev;
+
+       /* type and info about this device */
+       u64 type;
+
+       /* partition number, 0 for whole dev */
+       int partition;
+
+       /* length of the name data at the end of the item */
+       int name_len;
+
+       /* physical drive uuid (or lvm uuid) */
+       u8 uuid[BTRFS_DEV_UUID_SIZE];
+
+       char *name;
+};
+
+int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
+                          struct btrfs_device *device,
+                          u64 owner, u64 num_bytes, u64 *start);
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
+                   u64 logical, u64 *phys, u64 *length,
+                   struct btrfs_device **dev);
+int btrfs_read_sys_array(struct btrfs_root *root);
+int btrfs_read_chunk_tree(struct btrfs_root *root);
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *extent_root, u64 *start,
+                     u64 *num_bytes, u32 type);
+#endif