From 510be296772108b6d5ae829d76cef2b1a1690262 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:03:18 -0400 Subject: [PATCH] Add support for multiple devices per filesystem --- Makefile | 1 + convert.c | 8 +- ctree.c | 38 ++- ctree.h | 321 +++++++++++++++++++++--- debug-tree.c | 12 +- disk-io.c | 144 +++++++++-- disk-io.h | 2 + extent-cache.c | 5 +- extent-tree.c | 423 +++++++++++-------------------- extent_io.c | 4 +- mkfs.c | 45 +++- print-tree.c | 53 +++- utils.c | 160 ++++++++++-- utils.h | 8 +- volumes.c | 771 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ volumes.h | 75 ++++++ 16 files changed, 1712 insertions(+), 358 deletions(-) create mode 100644 volumes.c create mode 100644 volumes.h diff --git a/Makefile b/Makefile index 4cabeef..ff7ddad 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ CFLAGS = -g -Werror objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ root-tree.o dir-item.o hash.o file-item.o inode-item.o \ inode-map.o crc32c.o rbtree.o extent-cache.o extent_io.o \ + volumes.o # CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ -Wuninitialized -Wshadow -Wundef diff --git a/convert.c b/convert.c index d407bb8..83928f4 100644 --- a/convert.c +++ b/convert.c @@ -341,7 +341,7 @@ static int record_file_extent(struct btrfs_trans_handle *trans, if (ret) goto fail; ret = btrfs_update_block_group(trans, root, disk_bytenr, - num_bytes, 1, 0, 1); + num_bytes, 1, 0); } else if (ret == -EEXIST) { ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, root->root_key.objectid, @@ -1432,7 +1432,7 @@ static int fixup_block_accounting(struct btrfs_trans_handle *trans, if (key.type == BTRFS_EXTENT_ITEM_KEY) { bytes_used += key.offset; ret = btrfs_update_block_group(trans, root, - key.objectid, key.offset, 1, 0, 1); + key.objectid, key.offset, 1, 0); BUG_ON(ret); } path.slots[0]++; @@ -1547,7 +1547,7 @@ int do_convert(const char *devname, int datacsum, int packing, int noxattr) { int i, fd, ret; u32 blocksize; - u64 blocks[4]; + u64 blocks[6]; u64 total_bytes; u64 super_bytenr; ext2_filsys ext2_fs; @@ -1656,7 +1656,7 @@ int do_rollback(const char *devname, int force) struct btrfs_dir_item *dir; struct btrfs_inode_item *inode; struct btrfs_file_extent_item *fi; - struct btrfs_inode_timespec *tspec; + struct btrfs_timespec *tspec; struct extent_buffer *leaf; struct btrfs_key key; struct btrfs_path path; diff --git a/ctree.c b/ctree.c index 0e2fe7f..88ebd9e 100644 --- a/ctree.c +++ b/ctree.c @@ -68,6 +68,14 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } +static void add_root_to_dirty_list(struct btrfs_root *root) +{ + if (root->track_dirty && list_empty(&root->dirty_list)) { + list_add(&root->dirty_list, + &root->fs_info->dirty_cowonly_roots); + } +} + int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -194,6 +202,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, root_gen, 0, 0, 1); } free_extent_buffer(buf); + add_root_to_dirty_list(root); } else { root_gen = btrfs_header_generation(parent); btrfs_set_node_blockptr(parent, parent_slot, @@ -237,7 +246,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } - search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); + search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); return ret; @@ -690,6 +699,7 @@ static int balance_level(struct btrfs_trans_handle *trans, BUG_ON(ret); root->node = child; + add_root_to_dirty_list(root); path->nodes[level] = NULL; clean_tree_block(trans, root, mid); wait_on_tree_block_writeback(root, mid); @@ -1338,6 +1348,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, /* the super has an extra ref to root->node */ free_extent_buffer(root->node); root->node = c; + add_root_to_dirty_list(root); extent_buffer_get(c); path->nodes[level] = c; path->slots[level] = 0; @@ -2619,3 +2630,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) } return 0; } + +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == type) + return 0; + } + return 1; +} + diff --git a/ctree.h b/ctree.h index 113d38f..c858514 100644 --- a/ctree.h +++ b/ctree.h @@ -30,12 +30,44 @@ struct btrfs_trans_handle; #define BTRFS_MAGIC "_B4RfS_M" #define BTRFS_MAX_LEVEL 8 + +/* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL + +/* stores information about which extents are in use, and reference counts */ #define BTRFS_EXTENT_TREE_OBJECTID 2ULL + +/* one per subvolume, storing files and directories */ #define BTRFS_FS_TREE_OBJECTID 3ULL + +/* directory objectid inside the root tree */ #define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL + + +/* + * chunk tree stores translations from logical -> physical block numbering + * the super block points to the chunk tree + */ +#define BTRFS_CHUNK_TREE_OBJECTID 5ULL + +/* + * stores information about which areas of a given device are in use. + * one per device. The tree of tree roots points to the device tree + */ +#define BTRFS_DEV_TREE_OBJECTID 6ULL + +/* + * All files have objectids higher than this. + */ #define BTRFS_FIRST_FREE_OBJECTID 256ULL + +/* + * the device items go into the chunk tree. The key is in the form + * [ 1 BTRFS_DEV_ITEM_KEY device_id ] + */ +#define BTRFS_DEV_ITEMS_OBJECTID 1ULL + /* * we can actually store much bigger names, but lets not confuse the rest * of linux @@ -85,6 +117,81 @@ struct btrfs_key { u64 offset; } __attribute__ ((__packed__)); +struct btrfs_mapping_tree { + struct cache_tree cache_tree; +}; + +#define BTRFS_DEV_UUID_SIZE 16 +struct btrfs_dev_item { + /* the internal btrfs device id */ + __le64 devid; + + /* size of the device */ + __le64 total_bytes; + + /* bytes used */ + __le64 bytes_used; + + /* optimal io alignment for this device */ + __le32 io_align; + + /* optimal io width for this device */ + __le32 io_width; + + /* minimal io size for this device */ + __le32 sector_size; + + /* the kernel device number */ + __le64 rdev; + + /* type and info about this device */ + __le64 type; + + /* partition number, 0 for whole dev */ + __le32 partition; + + /* length of the name data at the end of the item */ + __le16 name_len; + + /* physical drive uuid (or lvm uuid) */ + u8 uuid[BTRFS_DEV_UUID_SIZE]; + /* name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_stripe { + __le64 devid; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_chunk { + __le64 owner; + __le64 stripe_len; + __le64 type; + + /* optimal io alignment for this chunk */ + __le32 io_align; + + /* optimal io width for this chunk */ + __le32 io_width; + + /* minimal io size for this chunk */ + __le32 sector_size; + + /* 2^16 stripes is quite a lot, a second limit is the size of a single + * item in the btree + */ + __le16 num_stripes; + struct btrfs_stripe stripe; + /* additional stripes go here */ +} __attribute__ ((__packed__)); + +static inline unsigned long btrfs_chunk_item_size(int num_stripes) +{ + BUG_ON(num_stripes == 0); + return sizeof(struct btrfs_chunk) + + sizeof(struct btrfs_stripe) * (num_stripes - 1); +} + #define BTRFS_FSID_SIZE 16 /* * every tree block (leaf or node) starts with this header. @@ -108,6 +215,13 @@ struct btrfs_header { #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) + +/* + * this is a very generous portion of the super block, giving us + * room to translate 14 chunks with 3 stripes each. + */ +#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 + /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -120,6 +234,7 @@ struct btrfs_super_block { __le64 magic; __le64 generation; __le64 root; + __le64 chunk_root; __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; @@ -127,7 +242,10 @@ struct btrfs_super_block { __le32 nodesize; __le32 leafsize; __le32 stripesize; + __le32 sys_chunk_array_size; u8 root_level; + u8 chunk_root_level; + u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; } __attribute__ ((__packed__)); /* @@ -197,12 +315,22 @@ struct btrfs_extent_ref { __le64 offset; } __attribute__ ((__packed__)); +/* dev extents record free space on individual devices. The owner + * field points back to the chunk allocation mapping tree that allocated + * the extent + */ +struct btrfs_dev_extent { + __le64 owner; + __le64 length; +} __attribute__ ((__packed__)); + + struct btrfs_inode_ref { __le16 name_len; /* name goes here */ } __attribute__ ((__packed__)); -struct btrfs_inode_timespec { +struct btrfs_timespec { __le64 sec; __le32 nsec; } __attribute__ ((__packed__)); @@ -220,13 +348,13 @@ struct btrfs_inode_item { __le32 uid; __le32 gid; __le32 mode; - __le32 rdev; + __le64 rdev; __le16 flags; __le16 compat_flags; - struct btrfs_inode_timespec atime; - struct btrfs_inode_timespec ctime; - struct btrfs_inode_timespec mtime; - struct btrfs_inode_timespec otime; + struct btrfs_timespec atime; + struct btrfs_timespec ctime; + struct btrfs_timespec mtime; + struct btrfs_timespec otime; } __attribute__ ((__packed__)); struct btrfs_dir_item { @@ -280,24 +408,26 @@ struct btrfs_csum_item { } __attribute__ ((__packed__)); /* tag for the radix tree of block groups in ram */ -#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) - -#define BTRFS_BLOCK_GROUP_DATA 1 -#define BTRFS_BLOCK_GROUP_MIXED 2 +#define BTRFS_BLOCK_GROUP_DATA (1 << 0) +#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) +#define BTRFS_BLOCK_GROUP_METADATA (1 << 2) struct btrfs_block_group_item { __le64 used; - u8 flags; + __le64 chunk_tree; + __le64 chunk_objectid; + __le64 flags; } __attribute__ ((__packed__)); struct btrfs_block_group_cache { struct cache_extent cache; struct btrfs_key key; struct btrfs_block_group_item item; - int data; - int cached; u64 pinned; + u64 flags; + int cached; }; + struct btrfs_extent_ops { int (*alloc_extent)(struct btrfs_root *root, u64 num_bytes, u64 hint_byte, struct btrfs_key *ins); @@ -305,11 +435,14 @@ struct btrfs_extent_ops { u64 num_bytes); }; +struct btrfs_device; struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *fs_root; struct btrfs_root *extent_root; struct btrfs_root *tree_root; + struct btrfs_root *chunk_root; + struct btrfs_root *dev_root; struct extent_io_tree extent_cache; struct extent_io_tree free_space_cache; @@ -318,18 +451,27 @@ struct btrfs_fs_info { struct extent_io_tree pending_del; struct extent_io_tree extent_ins; + /* logical->physical extent mapping */ + struct btrfs_mapping_tree mapping_tree; + u64 generation; u64 last_trans_committed; struct btrfs_trans_handle *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; struct mutex fs_mutex; - int fp; u64 total_pinned; struct btrfs_extent_ops *extent_ops; + struct list_head dirty_cowonly_roots; + + struct list_head devices; + struct list_head *last_device; + int fp; + int force_system_allocs; void *priv_data; }; + /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. @@ -356,10 +498,15 @@ struct btrfs_root { u32 stripesize; int ref_cows; + int track_dirty; + u32 type; u64 highest_inode; u64 last_inode_alloc; + + /* the dirty list is only used by non-reference counted roots */ + struct list_head dirty_list; }; /* @@ -408,6 +555,10 @@ struct btrfs_root { */ #define BTRFS_BLOCK_GROUP_ITEM_KEY 50 +#define BTRFS_DEV_EXTENT_KEY 75 +#define BTRFS_DEV_ITEM_KEY 76 +#define BTRFS_CHUNK_ITEM_KEY 77 + /* * string items are for debugging. They just store a short string of * data in the FS @@ -471,11 +622,104 @@ static inline void btrfs_set_##name(type *s, u##bits val) \ s->member = cpu_to_le##bits(val); \ } +BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64); +BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); +BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); +BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); +BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); +BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_FUNCS(device_rdev, struct btrfs_dev_item, rdev, 64); +BTRFS_SETGET_FUNCS(device_partition, struct btrfs_dev_item, partition, 32); +BTRFS_SETGET_FUNCS(device_name_len, struct btrfs_dev_item, name_len, 16); + +static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) +{ + return (char *)d + offsetof(struct btrfs_dev_item, uuid); +} + +static inline char *btrfs_device_name(struct btrfs_dev_item *d) +{ + return (char *)(d + 1); +} + +BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); +BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); +BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); +BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); +BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, + stripe_len, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, + num_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); + +static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, + int nr) +{ + unsigned long offset = (unsigned long)c; + offset += offsetof(struct btrfs_chunk, stripe); + offset += nr * sizeof(struct btrfs_stripe); + return (struct btrfs_stripe *)offset; +} + +static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val); +} + +static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val); +} + /* struct btrfs_block_group_item */ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_tree, struct btrfs_block_group_item, + chunk_tree, 64); +BTRFS_SETGET_FUNCS(disk_block_group_chunk_tree, struct btrfs_block_group_item, + chunk_tree, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_chunk_objecitd, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, + struct btrfs_block_group_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_flags, + struct btrfs_block_group_item, flags, 64); /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); @@ -489,7 +733,7 @@ BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); -BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, compat_flags, 16); @@ -511,51 +755,55 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, - struct btrfs_inode_item, rdev, 32); + struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 16); BTRFS_SETGET_STACK_FUNCS(stack_inode_compat_flags, struct btrfs_inode_item, compat_flags, 16); -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_atime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, atime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_mtime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, mtime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_ctime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, ctime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_otime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, otime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); -BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); -BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_inode_timespec, +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); -BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_inode_timespec, +BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); +/* struct btrfs_dev_extent */ +BTRFS_SETGET_FUNCS(dev_extent_owner, struct btrfs_dev_extent, owner, 64); +BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); + /* struct btrfs_extent_item */ BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); @@ -830,8 +1078,14 @@ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, + struct btrfs_super_block, sys_chunk_array_size, 32); BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, + chunk_root_level, 64); BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, @@ -963,14 +1217,19 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); -int btrfs_make_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root); +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 size); u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset); int btrfs_update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num, - int alloc, int mark_free, int data); + int alloc, int mark_free); /* ctree.c */ +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type); int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, diff --git a/debug-tree.c b/debug-tree.c index 7a2d0d0..acfa442 100644 --- a/debug-tree.c +++ b/debug-tree.c @@ -139,6 +139,10 @@ int main(int ac, char **av) printf("root tree\n"); btrfs_print_tree(root->fs_info->tree_root, root->fs_info->tree_root->node); + + printf("chunk tree\n"); + btrfs_print_tree(root->fs_info->chunk_root, + root->fs_info->chunk_root->node); } btrfs_init_path(&path); key.offset = 0; @@ -170,7 +174,13 @@ int main(int ac, char **av) root->leafsize); switch(found_key.objectid) { case BTRFS_ROOT_TREE_OBJECTID: - printf("root "); + if (!skip) + printf("root "); + break; + case BTRFS_DEV_TREE_OBJECTID: + if (!skip) { + printf("device extent tree "); + } break; case BTRFS_EXTENT_TREE_OBJECTID: skip = 0; diff --git a/disk-io.c b/disk-io.c index 0957c63..571cb09 100644 --- a/disk-io.c +++ b/disk-io.c @@ -28,14 +28,30 @@ #include "radix-tree.h" #include "ctree.h" #include "disk-io.h" +#include "volumes.h" #include "transaction.h" #include "crc32c.h" +int btrfs_open_device(struct btrfs_device *dev) +{ + dev->fd = open(dev->name, O_RDWR, 0600); + BUG_ON(dev->fd < 0); + return 0; +} + int btrfs_map_bh_to_logical(struct btrfs_root *root, struct extent_buffer *buf, u64 logical) { - buf->fd = root->fs_info->fp; - buf->dev_bytenr = logical; + u64 physical; + u64 length; + struct btrfs_device *device; + int ret; + + ret = btrfs_map_block(&root->fs_info->mapping_tree, logical, &physical, + &length, &device); + BUG_ON(ret); + buf->fd = device->fd; + buf->dev_bytenr = physical; return 0; } @@ -146,39 +162,56 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->leafsize = leafsize; root->stripesize = stripesize; root->ref_cows = 0; + root->track_dirty = 0; + root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; root->highest_inode = 0; root->last_inode_alloc = 0; + + INIT_LIST_HEAD(&root->dirty_list); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); root->root_key.objectid = objectid; return 0; } -static int commit_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static int update_cowonly_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { int ret; - u64 old_extent_bytenr; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *extent_root = fs_info->extent_root; + u64 old_root_bytenr; + struct btrfs_root *tree_root = root->fs_info->tree_root; - btrfs_write_dirty_block_groups(trans, fs_info->extent_root); + btrfs_write_dirty_block_groups(trans, root); while(1) { - old_extent_bytenr = btrfs_root_bytenr(&extent_root->root_item); - if (old_extent_bytenr == extent_root->node->start) + old_root_bytenr = btrfs_root_bytenr(&root->root_item); + if (old_root_bytenr == root->node->start) break; - btrfs_set_root_bytenr(&extent_root->root_item, - extent_root->node->start); - extent_root->root_item.level = - btrfs_header_level(extent_root->node); + btrfs_set_root_bytenr(&root->root_item, + root->node->start); + root->root_item.level = btrfs_header_level(root->node); ret = btrfs_update_root(trans, tree_root, - &extent_root->root_key, - &extent_root->root_item); + &root->root_key, + &root->root_item); BUG_ON(ret); - btrfs_write_dirty_block_groups(trans, fs_info->extent_root); + btrfs_write_dirty_block_groups(trans, root); + } + return 0; +} + +static int commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *root; + struct list_head *next; + + while(!list_empty(&fs_info->dirty_cowonly_roots)) { + next = fs_info->dirty_cowonly_roots.next; + list_del_init(next); + root = list_entry(next, struct btrfs_root, dirty_list); + update_cowonly_root(trans, root); } return 0; } @@ -384,6 +417,8 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr) struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *chunk_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *dev_root = malloc(sizeof(struct btrfs_root)); struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); int ret; struct btrfs_super_block *disk_super; @@ -398,6 +433,10 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr) fs_info->extent_root = extent_root; fs_info->extent_ops = NULL; fs_info->priv_data = NULL; + fs_info->chunk_root = chunk_root; + fs_info->dev_root = dev_root; + fs_info->force_system_allocs = 0; + extent_io_tree_init(&fs_info->extent_cache); extent_io_tree_init(&fs_info->free_space_cache); extent_io_tree_init(&fs_info->block_group_cache); @@ -405,13 +444,25 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr) extent_io_tree_init(&fs_info->pending_del); extent_io_tree_init(&fs_info->extent_ins); + cache_tree_init(&fs_info->mapping_tree.cache_tree); + mutex_init(&fs_info->fs_mutex); + INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); + INIT_LIST_HEAD(&fs_info->devices); + fs_info->last_device = &fs_info->devices; - __setup_root(512, 512, 512, 512, tree_root, + __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - fs_info->sb_buffer = read_tree_block(tree_root, sb_bytenr, 512); + fs_info->sb_buffer = btrfs_find_create_tree_block(tree_root, sb_bytenr, + 4096); BUG_ON(!fs_info->sb_buffer); + fs_info->sb_buffer->fd = fp; + fs_info->sb_buffer->dev_bytenr = sb_bytenr; + ret = read_extent_from_disk(fs_info->sb_buffer); + BUG_ON(ret); + btrfs_set_buffer_uptodate(fs_info->sb_buffer); + read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, sizeof(fs_info->super_copy)); read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, @@ -433,8 +484,24 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr) tree_root->sectorsize = sectorsize; tree_root->stripesize = stripesize; + ret = btrfs_read_sys_array(tree_root); + BUG_ON(ret); + blocksize = btrfs_level_size(tree_root, + btrfs_super_chunk_root_level(disk_super)); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); + chunk_root->node = read_tree_block(chunk_root, + btrfs_super_chunk_root(disk_super), + blocksize); + + BUG_ON(!chunk_root->node); + ret = btrfs_read_chunk_tree(chunk_root); + BUG_ON(ret); + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize); @@ -442,6 +509,13 @@ struct btrfs_root *open_ctree_fd(int fp, u64 sb_bytenr) ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); + extent_root->track_dirty = 1; + + ret = find_and_setup_root(tree_root, fs_info, + BTRFS_DEV_TREE_OBJECTID, dev_root); + BUG_ON(ret); + dev_root->track_dirty = 1; + ret = find_and_setup_root(tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); @@ -456,12 +530,17 @@ int write_ctree_super(struct btrfs_trans_handle *trans, { int ret; struct btrfs_root *tree_root = root->fs_info->tree_root; + struct btrfs_root *chunk_root = root->fs_info->chunk_root; btrfs_set_super_generation(&root->fs_info->super_copy, trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, tree_root->node->start); btrfs_set_super_root_level(&root->fs_info->super_copy, btrfs_header_level(tree_root->node)); + btrfs_set_super_chunk_root(&root->fs_info->super_copy, + chunk_root->node->start); + btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, + btrfs_header_level(chunk_root->node)); write_extent_buffer(root->fs_info->sb_buffer, &root->fs_info->super_copy, 0, sizeof(root->fs_info->super_copy)); @@ -471,6 +550,24 @@ int write_ctree_super(struct btrfs_trans_handle *trans, return ret; } +static int close_all_devices(struct btrfs_fs_info *fs_info) +{ + struct list_head *list; + struct list_head *next; + struct btrfs_device *device; + + list = &fs_info->devices; + while(!list_empty(list)) { + next = list->next; + list_del(next); + device = list_entry(next, struct btrfs_device, dev_list); + kfree(device->name); + close(device->fd); + kfree(device); + } + return 0; +} + int close_ctree(struct btrfs_root *root) { int ret; @@ -497,6 +594,13 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->commit_root); free_extent_buffer(root->fs_info->sb_buffer); + if (root->fs_info->chunk_root->node); + free_extent_buffer(root->fs_info->chunk_root->node); + + if (root->fs_info->dev_root->node); + free_extent_buffer(root->fs_info->dev_root->node); + + close_all_devices(root->fs_info); extent_io_tree_cleanup(&fs_info->extent_cache); extent_io_tree_cleanup(&fs_info->free_space_cache); extent_io_tree_cleanup(&fs_info->block_group_cache); @@ -507,6 +611,8 @@ int close_ctree(struct btrfs_root *root) free(fs_info->tree_root); free(fs_info->extent_root); free(fs_info->fs_root); + free(fs_info->chunk_root); + free(fs_info->dev_root); free(fs_info); return 0; diff --git a/disk-io.h b/disk-io.h index 6e56b2e..a457284 100644 --- a/disk-io.h +++ b/disk-io.h @@ -20,6 +20,7 @@ #define __DISKIO__ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) +struct btrfs_device; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); @@ -50,4 +51,5 @@ void btrfs_csum_final(u32 crc, char *result); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_open_device(struct btrfs_device *dev); #endif diff --git a/extent-cache.c b/extent-cache.c index 4e6d1b2..b871e18 100644 --- a/extent-cache.c +++ b/extent-cache.c @@ -96,10 +96,13 @@ int insert_existing_cache_extent(struct cache_tree *tree, struct cache_extent *pe) { struct rb_node *found; + struct cache_extent *entry; found = tree_insert(&tree->root, pe->start, pe->size, &pe->rb_node); - if (found) + if (found) { + entry = rb_entry(found, struct cache_extent, rb_node); return -EEXIST; + } return 0; } diff --git a/extent-tree.c b/extent-tree.c index c907fd8..2a47827 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -25,37 +25,19 @@ #include "print-tree.h" #include "transaction.h" #include "crc32c.h" +#include "volumes.h" -#define BLOCK_GROUP_DATA EXTENT_WRITEBACK +#define BLOCK_GROUP_DATA EXTENT_WRITEBACK #define BLOCK_GROUP_METADATA EXTENT_UPTODATE +#define BLOCK_GROUP_SYSTEM EXTENT_NEW + #define BLOCK_GROUP_DIRTY EXTENT_DIRTY static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int find_previous_extent(struct btrfs_root *root, - struct btrfs_path *path) -{ - struct btrfs_key found_key; - struct extent_buffer *leaf; - int ret; - while(1) { - if (path->slots[0] == 0) { - ret = btrfs_prev_leaf(root, path); - if (ret != 0) - return ret; - } else { - path->slots[0]--; - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.type == BTRFS_EXTENT_ITEM_KEY) - return 0; - } - return 1; -} static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -91,7 +73,7 @@ static int cache_block_group(struct btrfs_root *root, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ret; - ret = find_previous_extent(root, path); + ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) return ret; if (ret == 0) { @@ -168,7 +150,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, bytenr, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); if (ret) { return NULL; } @@ -182,9 +165,24 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return block_group; return NULL; } -static u64 noinline find_search_start(struct btrfs_root *root, + +static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) +{ + if ((bits & BLOCK_GROUP_DATA) && + (cache->flags & BTRFS_BLOCK_GROUP_DATA)) + return 1; + if ((bits & BLOCK_GROUP_METADATA) && + (cache->flags & BTRFS_BLOCK_GROUP_METADATA)) + return 1; + if ((bits & BLOCK_GROUP_SYSTEM) && + (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM)) + return 1; + return 0; +} + +static int noinline find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num, int data) + u64 *start_ret, int num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -192,6 +190,7 @@ static u64 noinline find_search_start(struct btrfs_root *root, u64 start = 0; u64 end = 0; u64 cache_miss = 0; + u64 search_start = *start_ret; int wrapped = 0; if (!cache) { @@ -203,6 +202,9 @@ again: goto out; last = max(search_start, cache->key.objectid); + if (!block_group_bits(cache, data)) { + goto new_group; + } while(1) { ret = find_first_extent_bit(&root->fs_info->free_space_cache, @@ -220,20 +222,18 @@ again: cache_miss = start; continue; } - if (data != BTRFS_BLOCK_GROUP_MIXED && - start + num > cache->key.objectid + cache->key.offset) + if (start + num > cache->key.objectid + cache->key.offset) goto new_group; - return start; + *start_ret = start; + return 0; } out: cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { - printk("Unable to find block group for %Lu\n", - search_start); + printk("Unable to find block group for %Lu\n", search_start); WARN_ON(1); - return search_start; } - return search_start; + return -ENOSPC; new_group: last = cache->key.objectid + cache->key.offset; @@ -244,7 +244,6 @@ no_cache: if (!wrapped) { wrapped = 1; last = search_start; - data = BTRFS_BLOCK_GROUP_MIXED; goto wrapped; } goto out; @@ -291,26 +290,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int ret; int full_search = 0; int factor = 8; - int data_swap = 0; block_group_cache = &info->block_group_cache; if (!owner) factor = 8; - if (data == BTRFS_BLOCK_GROUP_MIXED) { - bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - factor = 10; - } else if (data) - bit = BLOCK_GROUP_DATA; - else - bit = BLOCK_GROUP_METADATA; + bit = data; if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint && (shint->data == data || - shint->data == BTRFS_BLOCK_GROUP_MIXED)) { + if (shint && block_group_bits(shint, data)) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { @@ -318,8 +309,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && (hint->data == data || - hint->data == BTRFS_BLOCK_GROUP_MIXED)) { + if (hint && block_group_bits(hint, data)) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { @@ -365,12 +355,6 @@ again: full_search = 1; goto again; } - if (!data_swap) { - data_swap = 1; - bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - last = search_start; - goto again; - } found: return found_group; } @@ -985,7 +969,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, - int mark_free, int data) + int mark_free) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; @@ -1010,41 +994,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { - if (cache->data != data && - old_val < (cache->key.offset >> 1)) { - int bit_to_clear; - int bit_to_set; - cache->data = data; - if (data) { - bit_to_clear = BLOCK_GROUP_METADATA; - bit_to_set = BLOCK_GROUP_DATA; - cache->item.flags &= - ~BTRFS_BLOCK_GROUP_MIXED; - cache->item.flags |= - BTRFS_BLOCK_GROUP_DATA; - } else { - bit_to_clear = BLOCK_GROUP_DATA; - bit_to_set = BLOCK_GROUP_METADATA; - cache->item.flags &= - ~BTRFS_BLOCK_GROUP_MIXED; - cache->item.flags &= - ~BTRFS_BLOCK_GROUP_DATA; - } - clear_extent_bits(&info->block_group_cache, - start, end, bit_to_clear, - GFP_NOFS); - set_extent_bits(&info->block_group_cache, - start, end, bit_to_set, - GFP_NOFS); - } else if (cache->data != data && - cache->data != BTRFS_BLOCK_GROUP_MIXED) { - cache->data = BTRFS_BLOCK_GROUP_MIXED; - set_extent_bits(&info->block_group_cache, - start, end, - BLOCK_GROUP_DATA | - BLOCK_GROUP_METADATA, - GFP_NOFS); - } old_val += num_bytes; } else { old_val -= num_bytes; @@ -1341,7 +1290,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ops->free_extent(root, bytenr, num_bytes); ret = update_block_group(trans, root, bytenr, num_bytes, 0, - mark_free, 0); + mark_free); BUG_ON(ret); } btrfs_free_path(path); @@ -1434,36 +1383,21 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, u64 exclude_start, u64 exclude_nr, int data) { - struct btrfs_path *path; - struct btrfs_key key; - u64 hole_size = 0; - u64 aligned; int ret; - int slot = 0; - u64 last_byte = 0; u64 orig_search_start = search_start; - int start_found; - struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; - int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - u64 cached_start; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); - level = btrfs_header_level(root->node); - - if (num_bytes >= 32 * 1024 * 1024 && hint_byte) { - data = BTRFS_BLOCK_GROUP_MIXED; - } - if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); + if (hint_byte) { block_group = btrfs_lookup_block_group(info, hint_byte); if (!block_group) @@ -1477,7 +1411,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } total_needed += empty_size; - path = btrfs_alloc_path(); + check_failed: if (!block_group) { block_group = btrfs_lookup_block_group(info, search_start); @@ -1485,135 +1419,49 @@ check_failed: block_group = btrfs_lookup_block_group(info, orig_search_start); } - search_start = find_search_start(root, &block_group, search_start, - total_needed, data); - search_start = stripe_align(root, search_start); - cached_start = search_start; - btrfs_init_path(path); - ins->objectid = search_start; - ins->offset = 0; - start_found = 0; - path->reada = 2; - - ret = btrfs_search_slot(trans, root, ins, path, 0, 0); - if (ret < 0) - goto error; - ret = find_previous_extent(root, path); - if (ret < 0) + ret = find_search_start(root, &block_group, &search_start, + total_needed, data); + if (ret) goto error; - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &key, path->slots[0]); - while (1) { - l = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(l)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto error; - - search_start = max(search_start, - block_group->key.objectid); - if (!start_found) { - aligned = stripe_align(root, search_start); - ins->objectid = aligned; - if (aligned >= search_end) { - ret = -ENOSPC; - goto error; - } - ins->offset = search_end - aligned; - start_found = 1; - goto check_pending; - } - ins->objectid = stripe_align(root, - last_byte > search_start ? - last_byte : search_start); - if (search_end <= ins->objectid) { - ret = -ENOSPC; - goto error; - } - ins->offset = search_end - ins->objectid; - BUG_ON(ins->objectid >= search_end); - goto check_pending; - } - btrfs_item_key_to_cpu(l, &key, slot); - - if (key.objectid >= search_start && key.objectid > last_byte && - start_found) { - if (last_byte < search_start) - last_byte = search_start; - aligned = stripe_align(root, last_byte); - hole_size = key.objectid - aligned; - if (key.objectid > aligned && hole_size >= num_bytes) { - ins->objectid = aligned; - ins->offset = hole_size; - goto check_pending; - } - } - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { - if (!start_found && btrfs_key_type(&key) == - BTRFS_BLOCK_GROUP_ITEM_KEY) { - last_byte = key.objectid; - start_found = 1; - } - goto next; - } - - start_found = 1; - last_byte = key.objectid + key.offset; - - if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && - last_byte >= block_group->key.objectid + - block_group->key.offset) { - btrfs_release_path(root, path); - search_start = block_group->key.objectid + - block_group->key.offset; - goto new_group; - } -next: - path->slots[0]++; - cond_resched(); - } -check_pending: - /* we have to make sure we didn't find an extent that has already - * been allocated by the map tree or the original allocation - */ - btrfs_release_path(root, path); - BUG_ON(ins->objectid < search_start); + search_start = stripe_align(root, search_start); + ins->objectid = search_start; + ins->offset = num_bytes; if (ins->objectid + num_bytes >= search_end) goto enospc; - if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && - ins->objectid + num_bytes > block_group-> - key.objectid + block_group->key.offset) { + + if (ins->objectid + num_bytes > + block_group->key.objectid + block_group->key.offset) { search_start = block_group->key.objectid + block_group->key.offset; goto new_group; } + if (test_range_bit(&info->extent_ins, ins->objectid, ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { search_start = ins->objectid + num_bytes; goto new_group; } + if (test_range_bit(&info->pinned_extents, ins->objectid, ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { search_start = ins->objectid + num_bytes; goto new_group; } + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; } - if (!data) { + + if (!(data & BLOCK_GROUP_DATA)) { block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) trans->block_group = block_group; } ins->offset = num_bytes; - btrfs_free_path(path); return 0; new_group: @@ -1628,7 +1476,6 @@ enospc: if (!full_scan) total_needed -= empty_size; full_scan = 1; - data = BTRFS_BLOCK_GROUP_MIXED; } else wrapped = 1; } @@ -1639,8 +1486,6 @@ enospc: goto check_failed; error: - btrfs_release_path(root, path); - btrfs_free_path(path); return ret; } /* @@ -1670,6 +1515,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_extent_ref *ref; struct btrfs_key keys[2]; + if (data) + data = BLOCK_GROUP_DATA; + else if (info->force_system_allocs || root == root->fs_info->chunk_root) + data = BLOCK_GROUP_SYSTEM; + else + data = BLOCK_GROUP_METADATA; + WARN_ON(num_bytes < root->sectorsize); if (ops && ops->alloc_extent) { ret = ops->alloc_extent(root, num_bytes, hint_byte, ins); @@ -1699,7 +1551,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); - WARN_ON(data == 1); goto update_block; } @@ -1749,8 +1600,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } update_block: - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, - data); + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); if (ret) { printk("update block group failed for %Lu %Lu\n", ins->objectid, ins->offset); @@ -1812,6 +1662,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, blocksize, root->root_key.objectid, ref_generation, 0, 0, 0); + BUG_ON(1); return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); @@ -2179,11 +2030,44 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *key) +{ + int ret; + struct btrfs_key found_key; + struct extent_buffer *leaf; + int slot; + + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + while(1) { + slot = path->slots[0]; + leaf = path->nodes[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid >= key->objectid && + found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) + return 0; + path->slots[0]++; + } + ret = -ENOENT; +error: + return ret; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; int ret; - int err = 0; int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; @@ -2196,25 +2080,26 @@ int btrfs_read_block_groups(struct btrfs_root *root) root = info->extent_root; key.objectid = 0; - key.offset = BTRFS_BLOCK_GROUP_SIZE; + key.offset = 0; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); - path = btrfs_alloc_path(); if (!path) return -ENOMEM; while(1) { - ret = btrfs_search_slot(NULL, info->extent_root, - &key, path, 0, 0); + ret = find_first_block_group(root, path, &key); + if (ret > 0) { + ret = 0; + goto error; + } if (ret != 0) { - err = ret; - break; + goto error; } leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); cache = kmalloc(sizeof(*cache), GFP_NOFS); if (!cache) { - err = -1; + ret = -ENOMEM; break; } @@ -2226,16 +2111,14 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->pinned = 0; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - - if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) { - bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - cache->data = BTRFS_BLOCK_GROUP_MIXED; - } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + cache->flags = btrfs_block_group_flags(&cache->item); + bit = 0; + if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { bit = BLOCK_GROUP_DATA; - cache->data = BTRFS_BLOCK_GROUP_DATA; - } else { + } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { + bit = BLOCK_GROUP_SYSTEM; + } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { bit = BLOCK_GROUP_METADATA; - cache->data = 0; } /* use EXTENT_LOCKED to prevent merging */ @@ -2249,9 +2132,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_super_total_bytes(&info->super_copy)) break; } - + ret = 0; +error: btrfs_free_path(path); - return 0; + return ret; } static int btrfs_insert_block_group(struct btrfs_trans_handle *trans, @@ -2274,14 +2158,11 @@ static int btrfs_insert_block_group(struct btrfs_trans_handle *trans, return 0; } -int btrfs_make_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 size) { - u64 group_size; - u64 bytes_used; - u64 total_bytes; - u64 cur_start; - u64 nr = 0; int ret; int bit; struct btrfs_root *extent_root; @@ -2290,46 +2171,34 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; - group_size = BTRFS_BLOCK_GROUP_SIZE; - bytes_used = btrfs_super_bytes_used(&root->fs_info->super_copy); - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - - cur_start = 0; - while (cur_start < total_bytes) { - cache = malloc(sizeof(*cache)); - BUG_ON(!cache); - cache->key.objectid = cur_start; - cache->key.offset = group_size; - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); - memset(&cache->item, 0, sizeof(cache->item)); - if (nr == 0) - btrfs_set_block_group_used(&cache->item, bytes_used); - if (nr++ % 3) { - bit = BLOCK_GROUP_DATA; - cache->data = 1; - cache->item.flags |= BTRFS_BLOCK_GROUP_DATA; - } else { - bit = BLOCK_GROUP_METADATA; - cache->data = 0; - } - set_extent_bits(block_group_cache, cur_start, - cur_start + group_size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, cur_start, - (unsigned long)cache); - cur_start += group_size; - } - /* then insert all the items */ - cur_start = 0; - while(cur_start < total_bytes) { - cache = btrfs_lookup_block_group(root->fs_info, cur_start); - BUG_ON(!cache); - ret = btrfs_insert_block_group(trans, root, &cache->key, - &cache->item); - BUG_ON(ret); - cur_start += group_size; + cache = malloc(sizeof(*cache)); + BUG_ON(!cache); + cache->key.objectid = chunk_objectid; + cache->key.offset = size; + cache->cached = 0; + btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + memset(&cache->item, 0, sizeof(cache->item)); + btrfs_set_block_group_used(&cache->item, bytes_used); + btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree); + btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); + cache->flags = type; + btrfs_set_block_group_flags(&cache->item, type); + + if (type & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { + bit = BLOCK_GROUP_SYSTEM; + } else if (type & BTRFS_BLOCK_GROUP_METADATA) { + bit = BLOCK_GROUP_METADATA; } + set_extent_bits(block_group_cache, chunk_objectid, + chunk_objectid + size - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, chunk_objectid, + (unsigned long)cache); + ret = btrfs_insert_block_group(trans, root, &cache->key, &cache->item); + BUG_ON(ret); return 0; } @@ -2343,8 +2212,8 @@ u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation, int btrfs_update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, - int mark_free, int data) + int mark_free) { return update_block_group(trans, root, bytenr, num_bytes, - alloc, mark_free, data); + alloc, mark_free); } diff --git a/extent_io.c b/extent_io.c index 62f3568..b663275 100644 --- a/extent_io.c +++ b/extent_io.c @@ -552,8 +552,10 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, int ret; eb = malloc(sizeof(struct extent_buffer) + blocksize); - if (!eb) + if (!eb) { + BUG(); return NULL; + } eb->start = bytenr; eb->len = blocksize; diff --git a/mkfs.c b/mkfs.c index c3a84c0..d5cefac 100644 --- a/mkfs.c +++ b/mkfs.c @@ -33,6 +33,7 @@ #include "kerncompat.h" #include "ctree.h" #include "disk-io.h" +#include "volumes.h" #include "transaction.h" #include "utils.h" @@ -108,6 +109,9 @@ static int make_root_dir(int fd) { struct btrfs_root *root; struct btrfs_trans_handle *trans; struct btrfs_key location; + u64 bytes_used; + u64 chunk_start = 0; + u64 chunk_size = 0; int ret; root = open_ctree_fd(fd, 0); @@ -117,7 +121,40 @@ static int make_root_dir(int fd) { return -1; } trans = btrfs_start_transaction(root, 1); - ret = btrfs_make_block_groups(trans, root); + bytes_used = btrfs_super_bytes_used(&root->fs_info->super_copy); + + root->fs_info->force_system_allocs = 1; + ret = btrfs_make_block_group(trans, root, bytes_used, + BTRFS_BLOCK_GROUP_SYSTEM, + BTRFS_CHUNK_TREE_OBJECTID, + 0, BTRFS_MKFS_SYSTEM_GROUP_SIZE); + BUG_ON(ret); + ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, + &chunk_start, &chunk_size, + BTRFS_BLOCK_GROUP_METADATA); + BUG_ON(ret); + ret = btrfs_make_block_group(trans, root, 0, + BTRFS_BLOCK_GROUP_METADATA, + BTRFS_CHUNK_TREE_OBJECTID, + chunk_start, chunk_size); + BUG_ON(ret); + + root->fs_info->force_system_allocs = 0; + btrfs_commit_transaction(trans, root); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, + &chunk_start, &chunk_size, + BTRFS_BLOCK_GROUP_DATA); + BUG_ON(ret); + ret = btrfs_make_block_group(trans, root, 0, + BTRFS_BLOCK_GROUP_DATA, + BTRFS_CHUNK_TREE_OBJECTID, + chunk_start, chunk_size); + BUG_ON(ret); + + // ret = btrfs_make_block_group(trans, root, 0, 1); ret = btrfs_make_root_dir(trans, root->fs_info->tree_root, BTRFS_ROOT_TREE_DIR_OBJECTID); if (ret) @@ -179,7 +216,7 @@ int main(int ac, char **av) u32 sectorsize = 4096; u32 nodesize = 16 * 1024; u32 stripesize = 4096; - u64 blocks[4]; + u64 blocks[6]; int zero_end = 0; while(1) { @@ -261,10 +298,10 @@ int main(int ac, char **av) } } - for (i = 0; i < 4; i++) + for (i = 0; i < 6; i++) blocks[i] = BTRFS_SUPER_INFO_OFFSET + leafsize * i; - ret = make_btrfs(fd, blocks, block_count, nodesize, leafsize, + ret = make_btrfs(fd, file, blocks, block_count, nodesize, leafsize, sectorsize, stripesize); if (ret) { fprintf(stderr, "error during mkfs %d\n", ret); diff --git a/print-tree.c b/print-tree.c index db46339..ec99b8c 100644 --- a/print-tree.c +++ b/print-tree.c @@ -75,6 +75,40 @@ static int print_inode_ref_item(struct extent_buffer *eb, struct btrfs_item *ite return 0; } +static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) +{ + int num_stripes = btrfs_chunk_num_stripes(eb, chunk); + int i; + printf("\t\tchunk owner %llu type %llu num_stripes %d\n", + (unsigned long long)btrfs_chunk_owner(eb, chunk), + (unsigned long long)btrfs_chunk_type(eb, chunk), + num_stripes); + for (i = 0 ; i < num_stripes ; i++) { + printf("\t\t\tstripe %d devid %llu offset %llu\n", i, + (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), + (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); + } +} +static void print_dev_item(struct extent_buffer *eb, + struct btrfs_dev_item *dev_item) +{ + char *name; + int name_len; + + name_len = btrfs_device_name_len(eb, dev_item); + name = kmalloc(name_len, GFP_NOFS); + if (name) { + read_extent_buffer(eb, name, + (unsigned long)btrfs_device_name(dev_item), + name_len); + } + printf("\t\tdev item name %.*s devid %llu " + "total_bytes %llu bytes used %Lu\n", name_len, name, + (unsigned long long)btrfs_device_id(eb, dev_item), + (unsigned long long)btrfs_device_total_bytes(eb, dev_item), + (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); + kfree(name); +} void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; @@ -89,6 +123,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_block_group_item *bi; struct btrfs_extent_ref *ref; struct btrfs_inode_ref *iref; + struct btrfs_dev_extent *dev_extent; struct btrfs_disk_key disk_key; struct btrfs_root_item root_item; struct btrfs_block_group_item bg_item; @@ -190,9 +225,23 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_block_group_item); read_extent_buffer(l, &bg_item, (unsigned long)bi, sizeof(bg_item)); - printf("\t\tblock group used %llu flags %x\n", + printf("\t\tblock group used %llu flags %llx\n", (unsigned long long)btrfs_block_group_used(&bg_item), - bg_item.flags); + (unsigned long long)btrfs_block_group_flags(&bg_item)); + break; + case BTRFS_CHUNK_ITEM_KEY: + print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk)); + break; + case BTRFS_DEV_ITEM_KEY: + print_dev_item(l, btrfs_item_ptr(l, i, + struct btrfs_dev_item)); + break; + case BTRFS_DEV_EXTENT_KEY: + dev_extent = btrfs_item_ptr(l, i, + struct btrfs_dev_extent); + printf("\t\tdev extent owner %llu length %llu\n", + (unsigned long long)btrfs_dev_extent_owner(l, dev_extent), + (unsigned long long)btrfs_dev_extent_length(l, dev_extent)); break; case BTRFS_STRING_ITEM_KEY: /* dirty, but it's simple */ diff --git a/utils.c b/utils.c index 3a618ac..f04e9c5 100644 --- a/utils.c +++ b/utils.c @@ -32,13 +32,16 @@ #include "transaction.h" #include "crc32c.h" #include "utils.h" -static u64 reference_root_table[4] = { +static u64 reference_root_table[6] = { [1] = BTRFS_ROOT_TREE_OBJECTID, [2] = BTRFS_EXTENT_TREE_OBJECTID, - [3] = BTRFS_FS_TREE_OBJECTID, + [3] = BTRFS_CHUNK_TREE_OBJECTID, + [4] = BTRFS_DEV_TREE_OBJECTID, + [5] = BTRFS_FS_TREE_OBJECTID, }; -int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize, +int make_btrfs(int fd, char *device_name, + u64 blocks[6], u64 num_bytes, u32 nodesize, u32 leafsize, u32 sectorsize, u32 stripesize) { struct btrfs_super_block super; @@ -48,6 +51,10 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize, struct btrfs_extent_ref *extent_ref; struct btrfs_extent_item *extent_item; struct btrfs_inode_item *inode_item; + struct btrfs_chunk *chunk; + struct btrfs_dev_item *dev_item; + struct btrfs_dev_extent *dev_extent; + u8 *ptr; int i; int ret; u32 itemoff; @@ -56,6 +63,8 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize, u64 first_free; u64 ref_gen; u64 ref_root; + u32 array_size; + u32 item_size; first_free = BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1; first_free &= ~((u64)sectorsize - 1); @@ -66,27 +75,24 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize, strncpy((char *)&super.magic, BTRFS_MAGIC, sizeof(super.magic)); btrfs_set_super_generation(&super, 1); btrfs_set_super_root(&super, blocks[1]); + btrfs_set_super_chunk_root(&super, blocks[3]); btrfs_set_super_total_bytes(&super, num_bytes); - btrfs_set_super_bytes_used(&super, first_free + 3 * leafsize); + btrfs_set_super_bytes_used(&super, first_free + 5 * leafsize); btrfs_set_super_root_dir(&super, 0); btrfs_set_super_sectorsize(&super, sectorsize); btrfs_set_super_leafsize(&super, leafsize); btrfs_set_super_nodesize(&super, nodesize); btrfs_set_super_stripesize(&super, stripesize); btrfs_set_super_root_level(&super, 0); + btrfs_set_super_chunk_root_level(&super, 0); + btrfs_set_super_sys_array_size(&super, 0); buf = malloc(sizeof(*buf) + max(sectorsize, leafsize)); - BUG_ON(sizeof(super) > sectorsize); - memset(buf->data, 0, sectorsize); - memcpy(buf->data, &super, sizeof(super)); - ret = pwrite(fd, buf->data, sectorsize, blocks[0]); - BUG_ON(ret != sectorsize); - /* create the tree of root objects */ memset(buf->data, 0, leafsize); btrfs_set_header_bytenr(buf, blocks[1]); - btrfs_set_header_nritems(buf, 2); + btrfs_set_header_nritems(buf, 3); btrfs_set_header_generation(buf, 1); btrfs_set_header_owner(buf, BTRFS_ROOT_TREE_OBJECTID); write_extent_buffer(buf, super.fsid, (unsigned long) @@ -117,13 +123,23 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize, sizeof(root_item)); itemoff = itemoff - sizeof(root_item); - btrfs_set_root_bytenr(&root_item, blocks[3]); + btrfs_set_root_bytenr(&root_item, blocks[5]); btrfs_set_disk_key_objectid(&disk_key, BTRFS_FS_TREE_OBJECTID); btrfs_set_item_key(buf, &disk_key, 1); btrfs_set_item_offset(buf, btrfs_item_nr(buf, 1), itemoff); btrfs_set_item_size(buf, btrfs_item_nr(buf, 1), sizeof(root_item)); write_extent_buffer(buf, &root_item, btrfs_item_ptr_offset(buf, 1), sizeof(root_item)); + + itemoff = itemoff - sizeof(root_item); + btrfs_set_root_bytenr(&root_item, blocks[4]); + btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_TREE_OBJECTID); + btrfs_set_item_key(buf, &disk_key, 2); + btrfs_set_item_offset(buf, btrfs_item_nr(buf, 2), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(buf, 2), sizeof(root_item)); + write_extent_buffer(buf, &root_item, btrfs_item_ptr_offset(buf, 2), + sizeof(root_item)); + ret = pwrite(fd, buf->data, leafsize, blocks[1]); BUG_ON(ret != leafsize); @@ -140,7 +156,7 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize, extent_item = btrfs_item_ptr(buf, nritems, struct btrfs_extent_item); btrfs_set_extent_refs(buf, extent_item, 1); nritems++; - for (i = 1; i < 4; i++) { + for (i = 1; i < 6; i++) { BUG_ON(blocks[i] < first_free); BUG_ON(blocks[i] < blocks[i - 1]); @@ -190,13 +206,127 @@ int make_btrfs(int fd, u64 blocks[4], u64 num_bytes, u32 nodesize, ret = pwrite(fd, buf->data, leafsize, blocks[2]); BUG_ON(ret != leafsize); - /* finally create the FS root */ + /* create the chunk tree */ + nritems = 0; + item_size = btrfs_chunk_item_size(1); + itemoff = __BTRFS_LEAF_DATA_SIZE(leafsize) - item_size; + + /* first we have chunk 0 */ + btrfs_set_disk_key_objectid(&disk_key, 0); + btrfs_set_disk_key_offset(&disk_key, BTRFS_MKFS_SYSTEM_GROUP_SIZE); + btrfs_set_disk_key_type(&disk_key, BTRFS_CHUNK_ITEM_KEY); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(buf, nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(buf, nritems), item_size); + + chunk = btrfs_item_ptr(buf, nritems, struct btrfs_chunk); + btrfs_set_chunk_owner(buf, chunk, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_chunk_stripe_len(buf, chunk, 64 * 1024); + btrfs_set_chunk_type(buf, chunk, BTRFS_BLOCK_GROUP_SYSTEM); + btrfs_set_chunk_io_align(buf, chunk, sectorsize); + btrfs_set_chunk_io_width(buf, chunk, sectorsize); + btrfs_set_chunk_sector_size(buf, chunk, sectorsize); + btrfs_set_chunk_num_stripes(buf, chunk, 1); + btrfs_set_stripe_devid_nr(buf, chunk, 0, 1); + btrfs_set_stripe_offset_nr(buf, chunk, 0, 0); + + /* copy the key for the chunk to the system array */ + ptr = super.sys_chunk_array; + array_size = sizeof(disk_key); + + memcpy(ptr, &disk_key, sizeof(disk_key)); + ptr += sizeof(disk_key); + + /* copy the chunk to the system array */ + read_extent_buffer(buf, ptr, (unsigned long)chunk, item_size); + array_size += item_size; + ptr += item_size; + + /* then device 1 (there is no device 0) */ + nritems++; + item_size = sizeof(*dev_item) + strlen(device_name); + itemoff = itemoff - item_size; + btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_ITEMS_OBJECTID); + btrfs_set_disk_key_offset(&disk_key, 1); + btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_ITEM_KEY); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(buf, nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(buf, nritems), item_size); + + dev_item = btrfs_item_ptr(buf, nritems, struct btrfs_dev_item); + btrfs_set_device_id(buf, dev_item, 1); + btrfs_set_device_total_bytes(buf, dev_item, num_bytes); + btrfs_set_device_bytes_used(buf, dev_item, + BTRFS_MKFS_SYSTEM_GROUP_SIZE); + btrfs_set_device_io_align(buf, dev_item, sectorsize); + btrfs_set_device_io_width(buf, dev_item, sectorsize); + btrfs_set_device_sector_size(buf, dev_item, sectorsize); + btrfs_set_device_rdev(buf, dev_item, 0); + btrfs_set_device_type(buf, dev_item, 0); + btrfs_set_device_partition(buf, dev_item, 0); + btrfs_set_device_name_len(buf, dev_item, strlen(device_name)); + nritems++; + + memset_extent_buffer(buf, 0, (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_DEV_UUID_SIZE); + write_extent_buffer(buf, device_name, + (unsigned long)btrfs_device_name(dev_item), + strlen(device_name)); + + /* copy the device item and key into the system array */ + memcpy(ptr, &disk_key, sizeof(disk_key)); + ptr += sizeof(disk_key); + array_size += sizeof(disk_key); + + read_extent_buffer(buf, ptr, (unsigned long)dev_item, item_size); + array_size += item_size; + ptr += item_size; + + btrfs_set_super_sys_array_size(&super, array_size); + btrfs_set_header_bytenr(buf, blocks[3]); + btrfs_set_header_owner(buf, BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_header_nritems(buf, nritems); + ret = pwrite(fd, buf->data, leafsize, blocks[3]); + + /* create the device tree */ + nritems = 0; + itemoff = __BTRFS_LEAF_DATA_SIZE(leafsize) - + sizeof(struct btrfs_dev_extent); + + btrfs_set_disk_key_objectid(&disk_key, 1); + btrfs_set_disk_key_offset(&disk_key, 0); + btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_EXTENT_KEY); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(buf, nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(buf, nritems), + sizeof(struct btrfs_dev_extent)); + dev_extent = btrfs_item_ptr(buf, nritems, struct btrfs_dev_extent); + btrfs_set_dev_extent_owner(buf, dev_extent, 0); + btrfs_set_dev_extent_length(buf, dev_extent, + BTRFS_MKFS_SYSTEM_GROUP_SIZE); + nritems++; + + btrfs_set_header_bytenr(buf, blocks[4]); + btrfs_set_header_owner(buf, BTRFS_DEV_TREE_OBJECTID); + btrfs_set_header_nritems(buf, nritems); + ret = pwrite(fd, buf->data, leafsize, blocks[4]); + + /* finally create the FS root */ + btrfs_set_header_bytenr(buf, blocks[5]); btrfs_set_header_owner(buf, BTRFS_FS_TREE_OBJECTID); btrfs_set_header_nritems(buf, 0); - ret = pwrite(fd, buf->data, leafsize, blocks[3]); + ret = pwrite(fd, buf->data, leafsize, blocks[5]); BUG_ON(ret != leafsize); + /* and write out the super block */ + BUG_ON(sizeof(super) > sectorsize); + memset(buf->data, 0, sectorsize); + memcpy(buf->data, &super, sizeof(super)); + ret = pwrite(fd, buf->data, sectorsize, blocks[0]); + BUG_ON(ret != sectorsize); + + free(buf); return 0; } diff --git a/utils.h b/utils.h index 48352f2..9dc8cf8 100644 --- a/utils.h +++ b/utils.h @@ -18,8 +18,12 @@ #ifndef __UTILS__ #define __UTILS__ -int make_btrfs(int fd, u64 new_blocks[4], u64 num_bytes, u32 nodesize, - u32 leafsize, u32 sectorsize, u32 stripesize); + +#define BTRFS_MKFS_SYSTEM_GROUP_SIZE (4 * 1024 * 1024) + +int make_btrfs(int fd, char *device_name, + u64 blocks[6], u64 num_bytes, u32 nodesize, + u32 leafsize, u32 sectorsize, u32 stripesize); int btrfs_make_root_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid); #endif diff --git a/volumes.c b/volumes.c new file mode 100644 index 0000000..2fb5a20 --- /dev/null +++ b/volumes.c @@ -0,0 +1,771 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "print-tree.h" +#include "volumes.h" + +struct map_lookup { + struct cache_extent ce; + struct btrfs_device *dev; + u64 physical; +}; + +/* + * this uses a pretty simple search, the expectation is that it is + * called very infrequently and that a given device has a small number + * of extents + */ +static int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + struct btrfs_path *path, + u64 num_bytes, u64 *start) +{ + struct btrfs_key key; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *dev_extent = NULL; + u64 hole_size = 0; + u64 last_byte = 0; + u64 search_start = 0; + u64 search_end = device->total_bytes; + int ret; + int slot = 0; + int start_found; + struct extent_buffer *l; + + start_found = 0; + path->reada = 2; + + /* FIXME use last free of some kind */ + + key.objectid = device->devid; + key.offset = search_start; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto error; + ret = btrfs_previous_item(root, path, 0, key.type); + if (ret < 0) + goto error; + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + while (1) { + l = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(l)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; +no_more_items: + if (!start_found) { + if (search_start >= search_end) { + ret = -ENOSPC; + goto error; + } + *start = search_start; + start_found = 1; + goto check_pending; + } + *start = last_byte > search_start ? + last_byte : search_start; + if (search_end <= *start) { + ret = -ENOSPC; + goto error; + } + goto check_pending; + } + btrfs_item_key_to_cpu(l, &key, slot); + + if (key.objectid < device->devid) + goto next; + + if (key.objectid > device->devid) + goto no_more_items; + + if (key.offset >= search_start && key.offset > last_byte && + start_found) { + if (last_byte < search_start) + last_byte = search_start; + hole_size = key.offset - last_byte; + if (key.offset > last_byte && + hole_size >= num_bytes) { + *start = last_byte; + goto check_pending; + } + } + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) { + goto next; + } + + start_found = 1; + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); + last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); +next: + path->slots[0]++; + cond_resched(); + } +check_pending: + /* we have to make sure we didn't find an extent that has already + * been allocated by the map tree or the original allocation + */ + btrfs_release_path(root, path); + BUG_ON(*start < search_start); + + if (*start + num_bytes >= search_end) { + ret = -ENOSPC; + goto error; + } + /* check for pending inserts here */ + return 0; + +error: + btrfs_release_path(root, path); + return ret; +} + +int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 owner, u64 num_bytes, u64 *start) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *extent; + struct extent_buffer *leaf; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = find_free_dev_extent(trans, device, path, num_bytes, start); + if (ret) + goto err; + + key.objectid = device->devid; + key.offset = *start; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(*extent)); + BUG_ON(ret); + + leaf = path->nodes[0]; + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + btrfs_set_dev_extent_owner(leaf, extent, owner); + btrfs_set_dev_extent_length(leaf, extent, num_bytes); + btrfs_mark_buffer_dirty(leaf); +err: + btrfs_free_path(path); + return ret; +} + +static int find_next_chunk(struct btrfs_root *root, u64 *objectid) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + key.objectid = (u64)-1; + key.offset = (u64)-1; + key.type = BTRFS_CHUNK_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + + BUG_ON(ret == 0); + + ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); + if (ret) { + *objectid = 0; + } else { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + *objectid = found_key.objectid + found_key.offset; + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + +static struct btrfs_device *next_device(struct list_head *head, + struct list_head *last) +{ + struct list_head *next = last->next; + struct btrfs_device *dev; + + if (list_empty(head)) + return NULL; + + if (next == head) + next = next->next; + + dev = list_entry(next, struct btrfs_device, dev_list); + return dev; +} + +static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path, + u64 *objectid) +{ + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + + BUG_ON(ret == 0); + + ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + BTRFS_DEV_ITEM_KEY); + if (ret) { + *objectid = 1; + } else { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + *objectid = found_key.offset + 1; + } + ret = 0; +error: + btrfs_release_path(root, path); + return ret; +} + +/* + * the device information is stored in the chunk root + * the btrfs_device struct should be fully filled in + */ +int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct btrfs_dev_item *dev_item; + struct extent_buffer *leaf; + struct btrfs_key key; + unsigned long ptr; + u64 free_devid; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = find_next_devid(root, path, &free_devid); + if (ret) + goto out; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = free_devid; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(*dev_item) + device->name_len); + if (ret) + goto out; + + leaf = path->nodes[0]; + dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + + btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_type(leaf, dev_item, device->type); + btrfs_set_device_io_align(leaf, dev_item, device->io_align); + btrfs_set_device_io_width(leaf, dev_item, device->io_width); + btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); + btrfs_set_device_rdev(leaf, dev_item, device->rdev); + btrfs_set_device_partition(leaf, dev_item, device->partition); + btrfs_set_device_name_len(leaf, dev_item, device->name_len); + btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + + ptr = (unsigned long)btrfs_device_name(dev_item); + write_extent_buffer(leaf, device->name, ptr, device->name_len); + + ptr = (unsigned long)btrfs_device_uuid(dev_item); + write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + btrfs_mark_buffer_dirty(leaf); + ret = 0; + +out: + btrfs_free_path(path); + return ret; +} +int btrfs_update_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root; + struct btrfs_dev_item *dev_item; + struct extent_buffer *leaf; + struct btrfs_key key; + + root = device->dev_root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = device->devid; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + leaf = path->nodes[0]; + dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + + btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_type(leaf, dev_item, device->type); + btrfs_set_device_io_align(leaf, dev_item, device->io_align); + btrfs_set_device_io_width(leaf, dev_item, device->io_width); + btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); + btrfs_set_device_rdev(leaf, dev_item, device->rdev); + btrfs_set_device_partition(leaf, dev_item, device->partition); + btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + btrfs_mark_buffer_dirty(leaf); + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *key, + struct btrfs_chunk *chunk, int item_size) +{ + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_disk_key disk_key; + u32 array_size; + u8 *ptr; + + array_size = btrfs_super_sys_array_size(super_copy); + if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) + return -EFBIG; + + ptr = super_copy->sys_chunk_array + array_size; + btrfs_cpu_key_to_disk(&disk_key, key); + memcpy(ptr, &disk_key, sizeof(disk_key)); + ptr += sizeof(disk_key); + memcpy(ptr, chunk, item_size); + item_size += sizeof(disk_key); + btrfs_set_super_sys_array_size(super_copy, array_size + item_size); + return 0; +} + +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 *num_bytes, u32 type) +{ + u64 dev_offset; + struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; + struct btrfs_stripe *stripes; + struct btrfs_device *device = NULL; + struct btrfs_chunk *chunk; + struct list_head *dev_list = &extent_root->fs_info->devices; + struct list_head *last_dev = extent_root->fs_info->last_device; + struct map_lookup *map; + u64 physical; + u64 calc_size; + int num_stripes; + int ret; + int index = 0; + struct btrfs_key key; + + + ret = find_next_chunk(chunk_root, &key.objectid); + if (ret) + return ret; + + num_stripes = 1; + chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); + if (!chunk) + return -ENOMEM; + + stripes = &chunk->stripe; + + while(index < num_stripes) { + device = next_device(dev_list, last_dev); + BUG_ON(!device); + last_dev = &device->dev_list; + extent_root->fs_info->last_device = last_dev; + + if (index == 0) { + int mask = device->io_align; + calc_size = (device->total_bytes * 95) / 100; + calc_size = device->total_bytes - calc_size; + calc_size = (calc_size / mask) * mask; + *num_bytes = calc_size; + } + + ret = btrfs_alloc_dev_extent(trans, device, + key.objectid, + calc_size, &dev_offset); + BUG_ON(ret); + + device->bytes_used += calc_size; + ret = btrfs_update_device(trans, device); + BUG_ON(ret); + + btrfs_set_stack_stripe_devid(stripes + index, device->devid); + btrfs_set_stack_stripe_offset(stripes + index, dev_offset); + physical = dev_offset; + index++; + } + + /* key.objectid was set above */ + key.offset = *num_bytes; + key.type = BTRFS_CHUNK_ITEM_KEY; + btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); + btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024); + btrfs_set_stack_chunk_type(chunk, type); + btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); + btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + + ret = btrfs_insert_item(trans, chunk_root, &key, chunk, + btrfs_chunk_item_size(num_stripes)); + BUG_ON(ret); + *start = key.objectid; + + map = kmalloc(sizeof(*map), GFP_NOFS); + if (!map) + return -ENOMEM; + + map->ce.start = key.objectid; + map->ce.size = key.offset; + + map->physical = physical; + map->dev = device; + + if (!map->dev) { + kfree(map); + return -EIO; + } + ret = insert_existing_cache_extent( + &extent_root->fs_info->mapping_tree.cache_tree, + &map->ce); + BUG_ON(ret); + + kfree(chunk); + return ret; +} + +void btrfs_mapping_init(struct btrfs_mapping_tree *tree) +{ + cache_tree_init(&tree->cache_tree); +} + +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, + u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev) +{ + struct cache_extent *ce; + struct map_lookup *map; + u64 offset; + + ce = find_first_cache_extent(&map_tree->cache_tree, logical); + BUG_ON(!ce); + BUG_ON(ce->start > logical || ce->start + ce->size < logical); + map = container_of(ce, struct map_lookup, ce); + offset = logical - ce->start; + *phys = map->physical + offset; + *length = ce->size - offset; + *dev = map->dev; + return 0; +} + +struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid) +{ + struct btrfs_device *dev; + struct list_head *cur = root->fs_info->devices.next; + struct list_head *head = &root->fs_info->devices; + + while(cur != head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + if (dev->devid == devid) + return dev; + cur = cur->next; + } + return NULL; +} + +static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk) +{ + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + struct map_lookup *map; + struct cache_extent *ce; + u64 logical; + u64 length; + u64 devid; + int ret; + + logical = key->objectid; + length = key->offset; + ce = find_first_cache_extent(&map_tree->cache_tree, logical); + + /* already mapped? */ + if (ce && ce->start <= logical && ce->start + ce->size > logical) { + return 0; + } + + map = kmalloc(sizeof(*map), GFP_NOFS); + if (!map) + return -ENOMEM; + + map->ce.start = logical; + map->ce.size = length; + + map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0); + devid = btrfs_stripe_devid_nr(leaf, chunk, 0); + map->dev = btrfs_find_device(root, devid); + + if (!map->dev) { + kfree(map); + return -EIO; + } + ret = insert_existing_cache_extent(&map_tree->cache_tree, &map->ce); + BUG_ON(ret); + + return 0; +} + +static int fill_device_from_item(struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item, + struct btrfs_device *device) +{ + unsigned long ptr; + char *name; + + device->devid = btrfs_device_id(leaf, dev_item); + device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); + device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); + device->type = btrfs_device_type(leaf, dev_item); + device->io_align = btrfs_device_io_align(leaf, dev_item); + device->io_width = btrfs_device_io_width(leaf, dev_item); + device->sector_size = btrfs_device_sector_size(leaf, dev_item); + device->rdev = btrfs_device_rdev(leaf, dev_item); + device->partition = btrfs_device_partition(leaf, dev_item); + device->name_len = btrfs_device_name_len(leaf, dev_item); + + ptr = (unsigned long)btrfs_device_uuid(dev_item); + read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + + name = kmalloc(device->name_len + 1, GFP_NOFS); + if (!name) + return -ENOMEM; + device->name = name; + ptr = (unsigned long)btrfs_device_name(dev_item); + read_extent_buffer(leaf, name, ptr, device->name_len); + name[device->name_len] = '\0'; + return 0; +} + +static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item) +{ + struct btrfs_device *device; + u64 devid; + int ret; + + devid = btrfs_device_id(leaf, dev_item); + if (btrfs_find_device(root, devid)) + return 0; + + device = kmalloc(sizeof(*device), GFP_NOFS); + if (!device) + return -ENOMEM; + + fill_device_from_item(leaf, dev_item, device); + device->dev_root = root->fs_info->dev_root; + device->fd = 0; + list_add(&device->dev_list, &root->fs_info->devices); + memcpy(&device->dev_key, key, sizeof(*key)); + + ret = btrfs_open_device(device); + if (ret) { + kfree(device); + } + return ret; +} + +int btrfs_read_sys_array(struct btrfs_root *root) +{ + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct extent_buffer *sb = root->fs_info->sb_buffer; + struct btrfs_disk_key *disk_key; + struct btrfs_dev_item *dev_item; + struct btrfs_chunk *chunk; + struct btrfs_key key; + u32 num_stripes; + u32 array_size; + u32 len = 0; + u8 *ptr; + unsigned long sb_ptr; + u32 cur; + int ret; + int dev_only = 1; + + array_size = btrfs_super_sys_array_size(super_copy); + + /* + * we do this loop twice, once for the device items and + * once for all of the chunks. This way there are device + * structs filled in for every chunk + */ +again: + ptr = super_copy->sys_chunk_array; + sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); + cur = 0; + + while (cur < array_size) { + disk_key = (struct btrfs_disk_key *)ptr; + btrfs_disk_key_to_cpu(&key, disk_key); + + len = sizeof(*disk_key); + ptr += len; + sb_ptr += len; + cur += len; + + if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID && + key.type == BTRFS_DEV_ITEM_KEY) { + dev_item = (struct btrfs_dev_item *)sb_ptr; + if (dev_only) { + ret = read_one_dev(root, &key, sb, dev_item); + BUG_ON(ret); + } + len = sizeof(*dev_item); + len += btrfs_device_name_len(sb, dev_item); + } else if (key.type == BTRFS_CHUNK_ITEM_KEY) { + + chunk = (struct btrfs_chunk *)sb_ptr; + if (!dev_only) { + ret = read_one_chunk(root, &key, sb, chunk); + BUG_ON(ret); + } + num_stripes = btrfs_chunk_num_stripes(sb, chunk); + len = btrfs_chunk_item_size(num_stripes); + } else { + BUG(); + } + ptr += len; + sb_ptr += len; + cur += len; + } + if (dev_only == 1) { + dev_only = 0; + goto again; + } + return 0; +} + +int btrfs_read_chunk_tree(struct btrfs_root *root) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + struct btrfs_key found_key; + int ret; + int slot; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* first we search for all of the device items, and then we + * read in all of the chunk items. This way we can create chunk + * mappings that reference all of the devices that are afound + */ + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.offset = 0; + key.type = 0; +again: + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + while(1) { + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { + if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) + break; + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, + struct btrfs_dev_item); + ret = read_one_dev(root, &found_key, leaf, + dev_item); + BUG_ON(ret); + } + } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { + struct btrfs_chunk *chunk; + chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + ret = read_one_chunk(root, &found_key, leaf, chunk); + } + path->slots[0]++; + } + if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { + key.objectid = 0; + btrfs_release_path(root, path); + goto again; + } + + btrfs_free_path(path); + ret = 0; +error: + return ret; +} + diff --git a/volumes.h b/volumes.h new file mode 100644 index 0000000..fb29b54 --- /dev/null +++ b/volumes.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_VOLUMES_ +#define __BTRFS_VOLUMES_ +struct btrfs_device { + struct list_head dev_list; + struct btrfs_root *dev_root; + struct btrfs_key dev_key; + + int fd; + + /* the internal btrfs device id */ + u64 devid; + + /* size of the device */ + u64 total_bytes; + + /* bytes used */ + u64 bytes_used; + + /* optimal io alignment for this device */ + u32 io_align; + + /* optimal io width for this device */ + u32 io_width; + + /* minimal io size for this device */ + u32 sector_size; + + /* the kernel device number */ + u64 rdev; + + /* type and info about this device */ + u64 type; + + /* partition number, 0 for whole dev */ + int partition; + + /* length of the name data at the end of the item */ + int name_len; + + /* physical drive uuid (or lvm uuid) */ + u8 uuid[BTRFS_DEV_UUID_SIZE]; + + char *name; +}; + +int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 owner, u64 num_bytes, u64 *start); +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, + u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev); +int btrfs_read_sys_array(struct btrfs_root *root); +int btrfs_read_chunk_tree(struct btrfs_root *root); +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 *num_bytes, u32 type); +#endif -- 2.7.4