X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=disk-io.c;h=3647ecca7e630aa22a7cec319c5b2290365e0f5b;hb=dad817d3bad44619c355d11e39512b413da6939c;hp=a8176a53ac2fbef9130420a0d8be45eab97a7e5b;hpb=d25aa23b0236418e891e1db66562035891a8faa8;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/disk-io.c b/disk-io.c index a8176a5..3647ecc 100644 --- a/disk-io.c +++ b/disk-io.c @@ -16,15 +16,13 @@ * Boston, MA 021110-1307, USA. */ -#define _XOPEN_SOURCE 600 -#define __USE_XOPEN2K -#define _GNU_SOURCE 1 #include #include #include #include #include #include +#include #include "kerncompat.h" #include "radix-tree.h" #include "ctree.h" @@ -34,23 +32,51 @@ #include "crc32c.h" #include "utils.h" #include "print-tree.h" +#include "rbtree-utils.h" -static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) +/* specified errno for check_tree_block */ +#define BTRFS_BAD_BYTENR (-1) +#define BTRFS_BAD_FSID (-2) +#define BTRFS_BAD_LEVEL (-3) +#define BTRFS_BAD_NRITEMS (-4) + +/* Calculate max possible nritems for a leaf/node */ +static u32 max_nritems(u8 level, u32 nodesize) { - struct btrfs_fs_devices *fs_devices; - int ret = 1; + if (level == 0) + return ((nodesize - sizeof(struct btrfs_header)) / + sizeof(struct btrfs_item)); + return ((nodesize - sizeof(struct btrfs_header)) / + sizeof(struct btrfs_key_ptr)); +} - if (buf->start != btrfs_header_bytenr(buf)) { - printk("Check tree block failed, want=%Lu, have=%Lu\n", - buf->start, btrfs_header_bytenr(buf)); - return ret; - } +static int check_tree_block(struct btrfs_fs_info *fs_info, + struct extent_buffer *buf) +{ - fs_devices = root->fs_info->fs_devices; + struct btrfs_fs_devices *fs_devices; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + int ret = BTRFS_BAD_FSID; + + if (buf->start != btrfs_header_bytenr(buf)) + return BTRFS_BAD_BYTENR; + if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL) + return BTRFS_BAD_LEVEL; + if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf), + nodesize)) + return BTRFS_BAD_NRITEMS; + + /* Only leaf can be empty */ + if (btrfs_header_nritems(buf) == 0 && + btrfs_header_level(buf) != 0) + return BTRFS_BAD_NRITEMS; + + fs_devices = fs_info->fs_devices; while (fs_devices) { - if (!memcmp_extent_buffer(buf, fs_devices->fsid, - (unsigned long)btrfs_header_fsid(buf), + if (fs_info->ignore_fsid_mismatch || + !memcmp_extent_buffer(buf, fs_devices->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE)) { ret = 0; break; @@ -60,6 +86,38 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) return ret; } +static void print_tree_block_error(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, + int err) +{ + char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; + char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; + u8 buf[BTRFS_UUID_SIZE]; + + switch (err) { + case BTRFS_BAD_FSID: + read_extent_buffer(eb, buf, btrfs_header_fsid(), + BTRFS_UUID_SIZE); + uuid_unparse(buf, found_uuid); + uuid_unparse(fs_info->fsid, fs_uuid); + fprintf(stderr, "fsid mismatch, want=%s, have=%s\n", + fs_uuid, found_uuid); + break; + case BTRFS_BAD_BYTENR: + fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n", + eb->start, btrfs_header_bytenr(eb)); + break; + case BTRFS_BAD_LEVEL: + fprintf(stderr, "bad level, %u > %u\n", + btrfs_header_level(eb), BTRFS_MAX_LEVEL); + break; + case BTRFS_BAD_NRITEMS: + fprintf(stderr, "invalid nr_items: %u\n", + btrfs_header_nritems(eb)); + break; + } +} + u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) { return crc32c(seed, data, len); @@ -67,47 +125,61 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) void btrfs_csum_final(u32 crc, char *result) { - *(__le32 *)result = ~cpu_to_le32(crc); + put_unaligned_le32(~crc, result); } -int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, - int verify) +static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, + int verify, int silent) { - char *result; + char result[BTRFS_CSUM_SIZE]; u32 len; u32 crc = ~(u32)0; - result = malloc(csum_size * sizeof(char)); - if (!result) - return 1; - len = buf->len - BTRFS_CSUM_SIZE; crc = crc32c(crc, buf->data + BTRFS_CSUM_SIZE, len); btrfs_csum_final(crc, result); if (verify) { if (memcmp_extent_buffer(buf, result, 0, csum_size)) { - printk("checksum verify failed on %llu found %08X " - "wanted %08X\n", (unsigned long long)buf->start, - *((u32 *)result), *((u32*)(char *)buf->data)); - free(result); + if (!silent) + printk("checksum verify failed on %llu found %08X wanted %08X\n", + (unsigned long long)buf->start, + *((u32 *)result), + *((u32*)(char *)buf->data)); return 1; } } else { write_extent_buffer(buf, result, 0, csum_size); } - free(result); return 0; } -int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, - int verify) +int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify) +{ + return __csum_tree_block_size(buf, csum_size, verify, 0); +} + +int verify_tree_block_csum_silent(struct extent_buffer *buf, u16 csum_size) +{ + return __csum_tree_block_size(buf, csum_size, 1, 1); +} + +static int csum_tree_block_fs_info(struct btrfs_fs_info *fs_info, + struct extent_buffer *buf, int verify) { u16 csum_size = - btrfs_super_csum_size(root->fs_info->super_copy); + btrfs_super_csum_size(fs_info->super_copy); + if (verify && fs_info->suppress_check_block_errors) + return verify_tree_block_csum_silent(buf, csum_size); return csum_tree_block_size(buf, csum_size, verify); } +int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, + int verify) +{ + return csum_tree_block_fs_info(root->fs_info, buf, verify); +} + struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -115,38 +187,32 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, bytenr, blocksize); } -struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) +struct extent_buffer* btrfs_find_create_tree_block( + struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize) { - return alloc_extent_buffer(&root->fs_info->extent_cache, bytenr, - blocksize); + return alloc_extent_buffer(&fs_info->extent_cache, bytenr, blocksize); } -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, - u64 parent_transid) +void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, + u64 parent_transid) { - int ret; struct extent_buffer *eb; u64 length; struct btrfs_multi_bio *multi = NULL; struct btrfs_device *device; eb = btrfs_find_tree_block(root, bytenr, blocksize); - if (eb && btrfs_buffer_uptodate(eb, parent_transid)) { - free_extent_buffer(eb); - return 0; + if (!(eb && btrfs_buffer_uptodate(eb, parent_transid)) && + !btrfs_map_block(&root->fs_info->mapping_tree, READ, + bytenr, &length, &multi, 0, NULL)) { + device = multi->stripes[0].dev; + device->total_ios++; + blocksize = min(blocksize, (u32)(64 * 1024)); + readahead(device->fd, multi->stripes[0].physical, blocksize); } - length = blocksize; - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - bytenr, &length, &multi, 0, NULL); - BUG_ON(ret); - device = multi->stripes[0].dev; - device->total_ios++; - blocksize = min(blocksize, (u32)(64 * 1024)); - readahead(device->fd, multi->stripes[0].physical, blocksize); + free_extent_buffer(eb); kfree(multi); - return 0; } static int verify_parent_transid(struct extent_io_tree *io_tree, @@ -168,6 +234,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, (unsigned long long)parent_transid, (unsigned long long)btrfs_header_generation(eb)); if (ignore) { + eb->flags |= EXTENT_BAD_TRANSID; printk("Ignoring transid failure\n"); return 0; } @@ -180,7 +247,7 @@ out: } -static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) +int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) { unsigned long offset = 0; struct btrfs_multi_bio *multi = NULL; @@ -191,26 +258,41 @@ static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, i while (bytes_left) { read_len = bytes_left; - ret = btrfs_map_block(&info->mapping_tree, READ, - eb->start + offset, &read_len, &multi, - mirror, NULL); - if (ret) { - printk("Couldn't map the block %Lu\n", eb->start + offset); - kfree(multi); - return -EIO; - } - device = multi->stripes[0].dev; + device = NULL; + + if (!info->on_restoring && + eb->start != BTRFS_SUPER_INFO_OFFSET) { + ret = btrfs_map_block(&info->mapping_tree, READ, + eb->start + offset, &read_len, &multi, + mirror, NULL); + if (ret) { + printk("Couldn't map the block %Lu\n", eb->start + offset); + kfree(multi); + return -EIO; + } + device = multi->stripes[0].dev; + + if (device->fd <= 0) { + kfree(multi); + return -EIO; + } - if (device->fd == 0) { + eb->fd = device->fd; + device->total_ios++; + eb->dev_bytenr = multi->stripes[0].physical; kfree(multi); - return -EIO; - } + multi = NULL; + } else { + /* special case for restore metadump */ + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { + if (device->devid == 1) + break; + } - eb->fd = device->fd; - device->total_ios++; - eb->dev_bytenr = multi->stripes[0].physical; - kfree(multi); - multi = NULL; + eb->fd = device->fd; + eb->dev_bytenr = eb->start; + device->total_ios++; + } if (read_len > bytes_left) read_len = bytes_left; @@ -224,8 +306,9 @@ static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, i return 0; } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize, u64 parent_transid) +struct extent_buffer* read_tree_block_fs_info( + struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize, + u64 parent_transid) { int ret; struct extent_buffer *eb; @@ -235,36 +318,47 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, int num_copies; int ignore = 0; - eb = btrfs_find_create_tree_block(root, bytenr, blocksize); + eb = btrfs_find_create_tree_block(fs_info, bytenr, blocksize); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); if (btrfs_buffer_uptodate(eb, parent_transid)) return eb; while (1) { - ret = read_whole_eb(root->fs_info, eb, mirror_num); - if (ret == 0 && check_tree_block(root, eb) == 0 && - csum_tree_block(root, eb, 1) == 0 && + ret = read_whole_eb(fs_info, eb, mirror_num); + if (ret == 0 && csum_tree_block_fs_info(fs_info, eb, 1) == 0 && + check_tree_block(fs_info, eb) == 0 && verify_parent_transid(eb->tree, eb, parent_transid, ignore) == 0) { + if (eb->flags & EXTENT_BAD_TRANSID && + list_empty(&eb->recow)) { + list_add_tail(&eb->recow, + &fs_info->recow_ebs); + eb->refs++; + } btrfs_set_buffer_uptodate(eb); return eb; } if (ignore) { - if (check_tree_block(root, eb)) - printk("read block failed check_tree_block\n"); - else - printk("Csum didn't match\n"); + if (check_tree_block(fs_info, eb)) { + if (!fs_info->suppress_check_block_errors) + print_tree_block_error(fs_info, eb, + check_tree_block(fs_info, eb)); + } else { + if (!fs_info->suppress_check_block_errors) + fprintf(stderr, "Csum didn't match\n"); + } + ret = -EIO; break; } - num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, + num_copies = btrfs_num_copies(&fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) { ignore = 1; continue; } - if (btrfs_header_generation(eb) > best_transid) { + if (btrfs_header_generation(eb) > best_transid && mirror_num) { best_transid = btrfs_header_generation(eb); good_mirror = mirror_num; } @@ -276,153 +370,45 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, } } free_extent_buffer(eb); - return NULL; -} - -static int rmw_eb(struct btrfs_fs_info *info, - struct extent_buffer *eb, struct extent_buffer *orig_eb) -{ - int ret; - unsigned long orig_off = 0; - unsigned long dest_off = 0; - unsigned long copy_len = eb->len; - - ret = read_whole_eb(info, eb, 0); - if (ret) - return ret; - - if (eb->start + eb->len <= orig_eb->start || - eb->start >= orig_eb->start + orig_eb->len) - return 0; - /* - * | ----- orig_eb ------- | - * | ----- stripe ------- | - * | ----- orig_eb ------- | - * | ----- orig_eb ------- | - */ - if (eb->start > orig_eb->start) - orig_off = eb->start - orig_eb->start; - if (orig_eb->start > eb->start) - dest_off = orig_eb->start - eb->start; - - if (copy_len > orig_eb->len - orig_off) - copy_len = orig_eb->len - orig_off; - if (copy_len > eb->len - dest_off) - copy_len = eb->len - dest_off; - - memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len); - return 0; + return ERR_PTR(ret); } -static void split_eb_for_raid56(struct btrfs_fs_info *info, - struct extent_buffer *orig_eb, - struct extent_buffer **ebs, - u64 stripe_len, u64 *raid_map, - int num_stripes) +int read_extent_data(struct btrfs_root *root, char *data, + u64 logical, u64 *len, int mirror) { - struct extent_buffer *eb; - u64 start = orig_eb->start; - u64 this_eb_start; - int i; - int ret; - - for (i = 0; i < num_stripes; i++) { - if (raid_map[i] >= BTRFS_RAID5_P_STRIPE) - break; - - eb = malloc(sizeof(struct extent_buffer) + stripe_len); - if (!eb) - BUG(); - memset(eb, 0, sizeof(struct extent_buffer) + stripe_len); - - eb->start = raid_map[i]; - eb->len = stripe_len; - eb->refs = 1; - eb->flags = 0; - eb->fd = -1; - eb->dev_bytenr = (u64)-1; - - this_eb_start = raid_map[i]; + u64 offset = 0; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_device *device; + int ret = 0; + u64 max_len = *len; - if (start > this_eb_start || - start + orig_eb->len < this_eb_start + stripe_len) { - ret = rmw_eb(info, eb, orig_eb); - BUG_ON(ret); - } else { - memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len); - } - ebs[i] = eb; + ret = btrfs_map_block(&info->mapping_tree, READ, logical, len, + &multi, mirror, NULL); + if (ret) { + fprintf(stderr, "Couldn't map the block %llu\n", + logical + offset); + goto err; } -} - -static int write_raid56_with_parity(struct btrfs_fs_info *info, - struct extent_buffer *eb, - struct btrfs_multi_bio *multi, - u64 stripe_len, u64 *raid_map) -{ - struct extent_buffer *ebs[multi->num_stripes], *p_eb = NULL, *q_eb = NULL; - int i; - int j; - int ret; - int alloc_size = eb->len; - - if (stripe_len > alloc_size) - alloc_size = stripe_len; - - split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map, - multi->num_stripes); + device = multi->stripes[0].dev; - for (i = 0; i < multi->num_stripes; i++) { - struct extent_buffer *new_eb; - if (raid_map[i] < BTRFS_RAID5_P_STRIPE) { - ebs[i]->dev_bytenr = multi->stripes[i].physical; - ebs[i]->fd = multi->stripes[i].dev->fd; - multi->stripes[i].dev->total_ios++; - BUG_ON(ebs[i]->start != raid_map[i]); - continue; - } - new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS); - BUG_ON(!new_eb); - new_eb->dev_bytenr = multi->stripes[i].physical; - new_eb->fd = multi->stripes[i].dev->fd; - multi->stripes[i].dev->total_ios++; - new_eb->len = stripe_len; - - if (raid_map[i] == BTRFS_RAID5_P_STRIPE) - p_eb = new_eb; - else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE) - q_eb = new_eb; - } - if (q_eb) { - void *pointers[multi->num_stripes]; - ebs[multi->num_stripes - 2] = p_eb; - ebs[multi->num_stripes - 1] = q_eb; - - for (i = 0; i < multi->num_stripes; i++) - pointers[i] = ebs[i]->data; - - raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers); - } else { - ebs[multi->num_stripes - 1] = p_eb; - memcpy(p_eb->data, ebs[0]->data, stripe_len); - for (j = 1; j < multi->num_stripes - 1; j++) { - for (i = 0; i < stripe_len; i += sizeof(unsigned long)) { - *(unsigned long *)(p_eb->data + i) ^= - *(unsigned long *)(ebs[j]->data + i); - } - } - } + if (device->fd <= 0) + goto err; + if (*len > max_len) + *len = max_len; - for (i = 0; i < multi->num_stripes; i++) { - ret = write_extent_to_disk(ebs[i]); - BUG_ON(ret); - if (ebs[i] != eb) - kfree(ebs[i]); - } - return 0; + ret = pread64(device->fd, data, *len, multi->stripes[0].physical); + if (ret != *len) + ret = -EIO; + else + ret = 0; +err: + kfree(multi); + return ret; } -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, +int write_and_map_eb(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *eb) { int ret; @@ -431,14 +417,6 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 *raid_map = NULL; struct btrfs_multi_bio *multi = NULL; - if (check_tree_block(root, eb)) - BUG(); - if (!btrfs_buffer_uptodate(eb, trans->transid)) - BUG(); - - btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - csum_tree_block(root, eb, 0); - dev_nr = 0; length = eb->len; ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE, @@ -457,10 +435,30 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = write_extent_to_disk(eb); BUG_ON(ret); } + kfree(raid_map); kfree(multi); return 0; } +int write_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb) +{ + if (check_tree_block(root->fs_info, eb)) { + print_tree_block_error(root->fs_info, eb, + check_tree_block(root->fs_info, eb)); + BUG(); + } + + if (trans && !btrfs_buffer_uptodate(eb, trans->transid)) + BUG(); + + btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + csum_tree_block(root, eb, 0); + + return write_and_map_eb(trans, root, eb); +} + int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) @@ -481,6 +479,7 @@ int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->last_inode_alloc = 0; INIT_LIST_HEAD(&root->dirty_list); + INIT_LIST_HEAD(&root->orphan_data_extents); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); root->root_key.objectid = objectid; @@ -579,6 +578,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (root->commit_root == root->node) goto commit_tree; + if (root == root->fs_info->tree_root) + goto commit_tree; + if (root == root->fs_info->chunk_root) + goto commit_tree; free_extent_buffer(root->commit_root); root->commit_root = NULL; @@ -621,7 +624,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, if (ret) return ret; - blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + blocksize = root->nodesize; generation = btrfs_root_generation(&root->root_item); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); @@ -647,8 +650,7 @@ static int find_and_setup_log_root(struct btrfs_root *tree_root, return 0; } - blocksize = btrfs_level_size(tree_root, - btrfs_super_log_root_level(disk_super)); + blocksize = tree_root->nodesize; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, @@ -670,9 +672,7 @@ static int find_and_setup_log_root(struct btrfs_root *tree_root, return 0; } - -int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_root *root) +int btrfs_free_fs_root(struct btrfs_root *root) { if (root->node) free_extent_buffer(root->node); @@ -682,22 +682,16 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, return 0; } -static int free_fs_roots(struct btrfs_fs_info *fs_info) +static void __free_fs_root(struct rb_node *node) { - struct cache_extent *cache; struct btrfs_root *root; - while (1) { - cache = find_first_cache_extent(&fs_info->fs_root_cache, 0); - if (!cache) - break; - root = container_of(cache, struct btrfs_root, cache); - remove_cache_extent(&fs_info->fs_root_cache, cache); - btrfs_free_fs_root(fs_info, root); - } - return 0; + root = container_of(node, struct btrfs_root, rb_node); + btrfs_free_fs_root(root); } +FREE_RB_BASED_TREE(fs_roots, __free_fs_root); + struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, struct btrfs_key *location) { @@ -709,10 +703,9 @@ struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, u32 blocksize; int ret = 0; - root = malloc(sizeof(*root)); + root = calloc(1, sizeof(*root)); if (!root) return ERR_PTR(-ENOMEM); - memset(root, 0, sizeof(*root)); if (location->offset == (u64)-1) { ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); @@ -742,28 +735,55 @@ struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, memcpy(&root->root_key, location, sizeof(*location)); ret = 0; out: - btrfs_release_path(root, path); btrfs_free_path(path); if (ret) { free(root); return ERR_PTR(ret); } generation = btrfs_root_generation(&root->root_item); - blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + blocksize = root->nodesize; root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); - BUG_ON(!root->node); + if (!extent_buffer_uptodate(root->node)) { + free(root); + return ERR_PTR(-EIO); + } insert: root->ref_cows = 1; return root; } +static int btrfs_fs_roots_compare_objectids(struct rb_node *node, + void *data) +{ + u64 objectid = *((u64 *)data); + struct btrfs_root *root; + + root = rb_entry(node, struct btrfs_root, rb_node); + if (objectid > root->objectid) + return 1; + else if (objectid < root->objectid) + return -1; + else + return 0; +} + +static int btrfs_fs_roots_compare_roots(struct rb_node *node1, + struct rb_node *node2) +{ + struct btrfs_root *root; + + root = rb_entry(node2, struct btrfs_root, rb_node); + return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid); +} + struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location) { struct btrfs_root *root; - struct cache_extent *cache; + struct rb_node *node; int ret; + u64 objectid = location->objectid; if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) return fs_info->tree_root; @@ -775,82 +795,63 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return fs_info->dev_root; if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) return fs_info->csum_root; + if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) + return fs_info->quota_root; BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID || location->offset != (u64)-1); - cache = find_cache_extent(&fs_info->fs_root_cache, - location->objectid, 1); - if (cache) - return container_of(cache, struct btrfs_root, cache); + node = rb_search(&fs_info->fs_root_tree, (void *)&objectid, + btrfs_fs_roots_compare_objectids, NULL); + if (node) + return container_of(node, struct btrfs_root, rb_node); root = btrfs_read_fs_root_no_cache(fs_info, location); if (IS_ERR(root)) return root; - root->cache.start = location->objectid; - root->cache.size = 1; - ret = insert_existing_cache_extent(&fs_info->fs_root_cache, - &root->cache); + ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node, + btrfs_fs_roots_compare_roots); BUG_ON(ret); return root; } -static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, - u64 sb_bytenr, - u64 root_tree_bytenr, int writes, - int partial) +void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) { - u32 sectorsize; - u32 nodesize; - u32 leafsize; - u32 blocksize; - u32 stripesize; - u64 generation; - struct btrfs_key key; - struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *chunk_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *dev_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *csum_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); - int ret; - struct btrfs_super_block *disk_super; - struct btrfs_fs_devices *fs_devices = NULL; - u64 total_devs; - u64 features; - - if (sb_bytenr == 0) - sb_bytenr = BTRFS_SUPER_INFO_OFFSET; - - /* try to drop all the caches */ - if (posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED)) - fprintf(stderr, "Warning, could not drop caches\n"); - - ret = btrfs_scan_one_device(fp, path, &fs_devices, - &total_devs, sb_bytenr); + free(fs_info->tree_root); + free(fs_info->extent_root); + free(fs_info->chunk_root); + free(fs_info->dev_root); + free(fs_info->csum_root); + free(fs_info->quota_root); + free(fs_info->free_space_root); + free(fs_info->super_copy); + free(fs_info->log_root_tree); + free(fs_info); +} - if (ret) { - fprintf(stderr, "No valid Btrfs found on %s\n", path); - goto out; - } +struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr) +{ + struct btrfs_fs_info *fs_info; - if (total_devs != 1) { - ret = btrfs_scan_for_fsid(fs_devices, total_devs, 1); - if (ret) - goto out; - } + fs_info = calloc(1, sizeof(struct btrfs_fs_info)); + if (!fs_info) + return NULL; - memset(fs_info, 0, sizeof(*fs_info)); + fs_info->tree_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->extent_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->dev_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->csum_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->quota_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->free_space_root = calloc(1, sizeof(struct btrfs_root)); fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE); - fs_info->tree_root = tree_root; - fs_info->extent_root = extent_root; - fs_info->chunk_root = chunk_root; - fs_info->dev_root = dev_root; - fs_info->csum_root = csum_root; - if (!writes) - fs_info->readonly = 1; + if (!fs_info->tree_root || !fs_info->extent_root || + !fs_info->chunk_root || !fs_info->dev_root || + !fs_info->csum_root || !fs_info->quota_root || + !fs_info->free_space_root || !fs_info->super_copy) + goto free_all; extent_io_tree_init(&fs_info->extent_cache); extent_io_tree_init(&fs_info->free_space_cache); @@ -858,162 +859,217 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, extent_io_tree_init(&fs_info->pinned_extents); extent_io_tree_init(&fs_info->pending_del); extent_io_tree_init(&fs_info->extent_ins); - cache_tree_init(&fs_info->fs_root_cache); + fs_info->excluded_extents = NULL; + fs_info->fs_root_tree = RB_ROOT; cache_tree_init(&fs_info->mapping_tree.cache_tree); mutex_init(&fs_info->fs_mutex); - fs_info->fs_devices = fs_devices; INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); + INIT_LIST_HEAD(&fs_info->recow_ebs); - __setup_root(4096, 4096, 4096, 4096, tree_root, - fs_info, BTRFS_ROOT_TREE_OBJECTID); - - if (writes) - ret = btrfs_open_devices(fs_devices, O_RDWR); - else - ret = btrfs_open_devices(fs_devices, O_RDONLY); - if (ret) - goto out_cleanup; + if (!writable) + fs_info->readonly = 1; fs_info->super_bytenr = sb_bytenr; - disk_super = fs_info->super_copy; - ret = btrfs_read_dev_super(fs_devices->latest_bdev, - disk_super, sb_bytenr); - if (ret) { - printk("No valid btrfs found\n"); - goto out_devices; - } - - memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE); + fs_info->data_alloc_profile = (u64)-1; + fs_info->metadata_alloc_profile = (u64)-1; + fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + return fs_info; +free_all: + btrfs_free_fs_info(fs_info); + return NULL; +} +int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, int writable) +{ + u64 features; - features = btrfs_super_incompat_flags(disk_super) & + features = btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP; if (features) { printk("couldn't open because of unsupported " "option features (%Lx).\n", (unsigned long long)features); - goto out_devices; + return -ENOTSUP; } - features = btrfs_super_incompat_flags(disk_super); + features = btrfs_super_incompat_flags(sb); if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; - btrfs_set_super_incompat_flags(disk_super, features); + btrfs_set_super_incompat_flags(sb, features); } - features = btrfs_super_compat_ro_flags(disk_super) & + features = btrfs_super_compat_ro_flags(sb) & ~BTRFS_FEATURE_COMPAT_RO_SUPP; - if (writes && features) { + if (writable && features) { printk("couldn't open RDWR because of unsupported " "option features (%Lx).\n", (unsigned long long)features); - goto out_devices; + return -ENOTSUP; } + return 0; +} - nodesize = btrfs_super_nodesize(disk_super); - leafsize = btrfs_super_leafsize(disk_super); - sectorsize = btrfs_super_sectorsize(disk_super); - stripesize = btrfs_super_stripesize(disk_super); - tree_root->nodesize = nodesize; - tree_root->leafsize = leafsize; - tree_root->sectorsize = sectorsize; - tree_root->stripesize = stripesize; +static int find_best_backup_root(struct btrfs_super_block *super) +{ + struct btrfs_root_backup *backup; + u64 orig_gen = btrfs_super_generation(super); + u64 gen = 0; + int best_index = 0; + int i; - ret = btrfs_read_sys_array(tree_root); - if (ret) - goto out_devices; - blocksize = btrfs_level_size(tree_root, - btrfs_super_chunk_root_level(disk_super)); - generation = btrfs_super_chunk_root_generation(disk_super); + for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { + backup = super->super_roots + i; + if (btrfs_backup_tree_root_gen(backup) != orig_gen && + btrfs_backup_tree_root_gen(backup) > gen) { + best_index = i; + gen = btrfs_backup_tree_root_gen(backup); + } + } + return best_index; +} - __setup_root(nodesize, leafsize, sectorsize, stripesize, - chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); +static int setup_root_or_create_block(struct btrfs_fs_info *fs_info, + enum btrfs_open_ctree_flags flags, + struct btrfs_root *info_root, + u64 objectid, char *str) +{ + struct btrfs_super_block *sb = fs_info->super_copy; + struct btrfs_root *root = fs_info->tree_root; + u32 nodesize = btrfs_super_nodesize(sb); + int ret; - chunk_root->node = read_tree_block(chunk_root, - btrfs_super_chunk_root(disk_super), - blocksize, generation); - if (!extent_buffer_uptodate(chunk_root->node)) { - printk("Couldn't read chunk root\n"); - goto out_devices; + ret = find_and_setup_root(root, fs_info, objectid, info_root); + if (ret) { + printk("Couldn't setup %s tree\n", str); + if (!(flags & OPEN_CTREE_PARTIAL)) + return -EIO; + /* + * Need a blank node here just so we don't screw up in the + * million of places that assume a root has a valid ->node + */ + info_root->node = + btrfs_find_create_tree_block(fs_info, 0, nodesize); + if (!info_root->node) + return -ENOMEM; + clear_extent_buffer_uptodate(NULL, info_root->node); } - read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), - BTRFS_UUID_SIZE); + return 0; +} - if (!(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)) { - ret = btrfs_read_chunk_tree(chunk_root); - if (ret) { - printk("Couldn't read chunk tree\n"); - goto out_chunk; - } - } +int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, + enum btrfs_open_ctree_flags flags) +{ + struct btrfs_super_block *sb = fs_info->super_copy; + struct btrfs_root *root; + struct btrfs_key key; + u32 sectorsize; + u32 nodesize; + u32 leafsize; + u32 stripesize; + u64 generation; + u32 blocksize; + int ret; - blocksize = btrfs_level_size(tree_root, - btrfs_super_root_level(disk_super)); - generation = btrfs_super_generation(disk_super); + nodesize = btrfs_super_nodesize(sb); + leafsize = btrfs_super_leafsize(sb); + sectorsize = btrfs_super_sectorsize(sb); + stripesize = btrfs_super_stripesize(sb); - if (!root_tree_bytenr) - root_tree_bytenr = btrfs_super_root(disk_super); - tree_root->node = read_tree_block(tree_root, - root_tree_bytenr, - blocksize, generation); - if (!extent_buffer_uptodate(tree_root->node)) { - printk("Couldn't read tree root\n"); - goto out_failed; + root = fs_info->tree_root; + __setup_root(nodesize, leafsize, sectorsize, stripesize, + root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + blocksize = root->nodesize; + generation = btrfs_super_generation(sb); + + if (!root_tree_bytenr && !(flags & OPEN_CTREE_BACKUP_ROOT)) { + root_tree_bytenr = btrfs_super_root(sb); + } else if (flags & OPEN_CTREE_BACKUP_ROOT) { + struct btrfs_root_backup *backup; + int index = find_best_backup_root(sb); + if (index >= BTRFS_NUM_BACKUP_ROOTS) { + fprintf(stderr, "Invalid backup root number\n"); + return -EIO; + } + backup = fs_info->super_copy->super_roots + index; + root_tree_bytenr = btrfs_backup_tree_root(backup); + generation = btrfs_backup_tree_root_gen(backup); } - ret = find_and_setup_root(tree_root, fs_info, - BTRFS_EXTENT_TREE_OBJECTID, extent_root); - if (ret) { - printk("Couldn't setup extent tree\n"); - goto out_failed; + + root->node = read_tree_block(root, root_tree_bytenr, blocksize, + generation); + if (!extent_buffer_uptodate(root->node)) { + fprintf(stderr, "Couldn't read tree root\n"); + return -EIO; } - extent_root->track_dirty = 1; - ret = find_and_setup_root(tree_root, fs_info, - BTRFS_DEV_TREE_OBJECTID, dev_root); + ret = setup_root_or_create_block(fs_info, flags, fs_info->extent_root, + BTRFS_EXTENT_TREE_OBJECTID, "extent"); + if (ret) + return ret; + fs_info->extent_root->track_dirty = 1; + + ret = find_and_setup_root(root, fs_info, BTRFS_DEV_TREE_OBJECTID, + fs_info->dev_root); if (ret) { printk("Couldn't setup device tree\n"); - goto out_failed; + return -EIO; } - dev_root->track_dirty = 1; + fs_info->dev_root->track_dirty = 1; - ret = find_and_setup_root(tree_root, fs_info, - BTRFS_CSUM_TREE_OBJECTID, csum_root); - if (ret) { - printk("Couldn't setup csum tree\n"); - if (!partial) - goto out_failed; + ret = setup_root_or_create_block(fs_info, flags, fs_info->csum_root, + BTRFS_CSUM_TREE_OBJECTID, "csum"); + if (ret) + return ret; + fs_info->csum_root->track_dirty = 1; + + ret = find_and_setup_root(root, fs_info, BTRFS_QUOTA_TREE_OBJECTID, + fs_info->quota_root); + if (ret == 0) + fs_info->quota_enabled = 1; + + if (btrfs_fs_compat_ro(fs_info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { + ret = find_and_setup_root(root, fs_info, BTRFS_FREE_SPACE_TREE_OBJECTID, + fs_info->free_space_root); + if (ret) { + printk("Couldn't read free space tree\n"); + return -EIO; + } + fs_info->free_space_root->track_dirty = 1; } - csum_root->track_dirty = 1; - find_and_setup_log_root(tree_root, fs_info, disk_super); + ret = find_and_setup_log_root(root, fs_info, sb); + if (ret) { + printk("Couldn't setup log root tree\n"); + if (!(flags & OPEN_CTREE_PARTIAL)) + return -EIO; + } fs_info->generation = generation; fs_info->last_trans_committed = generation; - btrfs_read_block_groups(fs_info->tree_root); + if (extent_buffer_uptodate(fs_info->extent_root->node) && + !(flags & OPEN_CTREE_NO_BLOCK_GROUPS)) + btrfs_read_block_groups(fs_info->tree_root); key.objectid = BTRFS_FS_TREE_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; fs_info->fs_root = btrfs_read_fs_root(fs_info, &key); - if (!fs_info->fs_root) - goto out_failed; - - fs_info->data_alloc_profile = (u64)-1; - fs_info->metadata_alloc_profile = (u64)-1; - fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; - - return fs_info; - -out_failed: - if (partial) - return fs_info; + if (IS_ERR(fs_info->fs_root)) + return -EIO; + return 0; +} +void btrfs_release_all_roots(struct btrfs_fs_info *fs_info) +{ + if (fs_info->free_space_root) + free_extent_buffer(fs_info->free_space_root->node); + if (fs_info->quota_root) + free_extent_buffer(fs_info->quota_root->node); if (fs_info->csum_root) free_extent_buffer(fs_info->csum_root->node); if (fs_info->dev_root) @@ -1022,111 +1078,512 @@ out_failed: free_extent_buffer(fs_info->extent_root->node); if (fs_info->tree_root) free_extent_buffer(fs_info->tree_root->node); -out_chunk: + if (fs_info->log_root_tree) + free_extent_buffer(fs_info->log_root_tree->node); if (fs_info->chunk_root) free_extent_buffer(fs_info->chunk_root->node); -out_devices: - btrfs_close_devices(fs_info->fs_devices); -out_cleanup: +} + +static void free_map_lookup(struct cache_extent *ce) +{ + struct map_lookup *map; + + map = container_of(ce, struct map_lookup, ce); + kfree(map); +} + +FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup); + +void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info) +{ + while (!list_empty(&fs_info->recow_ebs)) { + struct extent_buffer *eb; + eb = list_first_entry(&fs_info->recow_ebs, + struct extent_buffer, recow); + list_del_init(&eb->recow); + free_extent_buffer(eb); + } + free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree); extent_io_tree_cleanup(&fs_info->extent_cache); extent_io_tree_cleanup(&fs_info->free_space_cache); extent_io_tree_cleanup(&fs_info->block_group_cache); extent_io_tree_cleanup(&fs_info->pinned_extents); extent_io_tree_cleanup(&fs_info->pending_del); extent_io_tree_cleanup(&fs_info->extent_ins); +} + +int btrfs_scan_fs_devices(int fd, const char *path, + struct btrfs_fs_devices **fs_devices, + u64 sb_bytenr, int super_recover, + int skip_devices) +{ + u64 total_devs; + u64 dev_size; + off_t seek_ret; + int ret; + if (!sb_bytenr) + sb_bytenr = BTRFS_SUPER_INFO_OFFSET; + + seek_ret = lseek(fd, 0, SEEK_END); + if (seek_ret < 0) + return -errno; + + dev_size = seek_ret; + lseek(fd, 0, SEEK_SET); + if (sb_bytenr > dev_size) { + fprintf(stderr, "Superblock bytenr is larger than device size\n"); + return -EINVAL; + } + + ret = btrfs_scan_one_device(fd, path, fs_devices, + &total_devs, sb_bytenr, super_recover); + if (ret) { + fprintf(stderr, "No valid Btrfs found on %s\n", path); + return ret; + } + + if (!skip_devices && total_devs != 1) { + ret = btrfs_scan_lblkid(); + if (ret) + return ret; + } + return 0; +} + +int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info, + u64 chunk_root_bytenr) +{ + struct btrfs_super_block *sb = fs_info->super_copy; + u32 sectorsize; + u32 nodesize; + u32 leafsize; + u32 blocksize; + u32 stripesize; + u64 generation; + int ret; + + nodesize = btrfs_super_nodesize(sb); + leafsize = btrfs_super_leafsize(sb); + sectorsize = btrfs_super_sectorsize(sb); + stripesize = btrfs_super_stripesize(sb); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + fs_info->chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); + + ret = btrfs_read_sys_array(fs_info->chunk_root); + if (ret) + return ret; + + blocksize = fs_info->chunk_root->nodesize; + generation = btrfs_super_chunk_root_generation(sb); + + if (chunk_root_bytenr && !IS_ALIGNED(chunk_root_bytenr, + btrfs_super_sectorsize(sb))) { + warning("chunk_root_bytenr %llu is unaligned to %u, ignore it", + chunk_root_bytenr, btrfs_super_sectorsize(sb)); + chunk_root_bytenr = 0; + } + + if (!chunk_root_bytenr) + chunk_root_bytenr = btrfs_super_chunk_root(sb); + else + generation = 0; + + fs_info->chunk_root->node = read_tree_block(fs_info->chunk_root, + chunk_root_bytenr, + blocksize, generation); + if (!extent_buffer_uptodate(fs_info->chunk_root->node)) { + if (fs_info->ignore_chunk_tree_error) { + warning("cannot read chunk root, continue anyway"); + fs_info->chunk_root = NULL; + return 0; + } else { + error("cannot read chunk root"); + return -EIO; + } + } + + if (!(btrfs_super_flags(sb) & BTRFS_SUPER_FLAG_METADUMP)) { + ret = btrfs_read_chunk_tree(fs_info->chunk_root); + if (ret) { + fprintf(stderr, "Couldn't read chunk tree\n"); + return ret; + } + } + return 0; +} + +static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, + u64 sb_bytenr, + u64 root_tree_bytenr, + u64 chunk_root_bytenr, + enum btrfs_open_ctree_flags flags) +{ + struct btrfs_fs_info *fs_info; + struct btrfs_super_block *disk_super; + struct btrfs_fs_devices *fs_devices = NULL; + struct extent_buffer *eb; + int ret; + int oflags; + + if (sb_bytenr == 0) + sb_bytenr = BTRFS_SUPER_INFO_OFFSET; + + /* try to drop all the caches */ + if (posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED)) + fprintf(stderr, "Warning, could not drop caches\n"); + + fs_info = btrfs_new_fs_info(flags & OPEN_CTREE_WRITES, sb_bytenr); + if (!fs_info) { + fprintf(stderr, "Failed to allocate memory for fs_info\n"); + return NULL; + } + if (flags & OPEN_CTREE_RESTORE) + fs_info->on_restoring = 1; + if (flags & OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS) + fs_info->suppress_check_block_errors = 1; + if (flags & OPEN_CTREE_IGNORE_FSID_MISMATCH) + fs_info->ignore_fsid_mismatch = 1; + if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR) + fs_info->ignore_chunk_tree_error = 1; + + ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr, + (flags & OPEN_CTREE_RECOVER_SUPER), + (flags & OPEN_CTREE_NO_DEVICES)); + if (ret) + goto out; + + fs_info->fs_devices = fs_devices; + if (flags & OPEN_CTREE_WRITES) + oflags = O_RDWR; + else + oflags = O_RDONLY; + + if (flags & OPEN_CTREE_EXCLUSIVE) + oflags |= O_EXCL; + + ret = btrfs_open_devices(fs_devices, oflags); + if (ret) + goto out; + + disk_super = fs_info->super_copy; + if (flags & OPEN_CTREE_RECOVER_SUPER) + ret = btrfs_read_dev_super(fs_devices->latest_bdev, + disk_super, sb_bytenr, 1); + else + ret = btrfs_read_dev_super(fp, disk_super, sb_bytenr, 0); + if (ret) { + printk("No valid btrfs found\n"); + goto out_devices; + } + + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID && + !fs_info->ignore_fsid_mismatch) { + fprintf(stderr, "ERROR: Filesystem UUID change in progress\n"); + goto out_devices; + } + + memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE); + + ret = btrfs_check_fs_compatibility(fs_info->super_copy, + flags & OPEN_CTREE_WRITES); + if (ret) + goto out_devices; + + ret = btrfs_setup_chunk_tree_and_device_map(fs_info, chunk_root_bytenr); + if (ret) + goto out_chunk; + + /* Chunk tree root is unable to read, return directly */ + if (!fs_info->chunk_root) + return fs_info; + + eb = fs_info->chunk_root->node; + read_extent_buffer(eb, fs_info->chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(eb), + BTRFS_UUID_SIZE); + + ret = btrfs_setup_all_roots(fs_info, root_tree_bytenr, flags); + if (ret && !(flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) && + !fs_info->ignore_chunk_tree_error) + goto out_chunk; + + return fs_info; + +out_chunk: + btrfs_release_all_roots(fs_info); + btrfs_cleanup_all_caches(fs_info); +out_devices: + btrfs_close_devices(fs_devices); out: - free(tree_root); - free(extent_root); - free(chunk_root); - free(dev_root); - free(csum_root); - free(fs_info); + btrfs_free_fs_info(fs_info); return NULL; } struct btrfs_fs_info *open_ctree_fs_info(const char *filename, u64 sb_bytenr, u64 root_tree_bytenr, - int writes, int partial) + u64 chunk_root_bytenr, + enum btrfs_open_ctree_flags flags) { int fp; + int ret; struct btrfs_fs_info *info; - int flags = O_CREAT | O_RDWR; + int oflags = O_RDWR; + struct stat st; + + ret = stat(filename, &st); + if (ret < 0) { + error("cannot stat '%s': %s", filename, strerror(errno)); + return NULL; + } + if (!(((st.st_mode & S_IFMT) == S_IFREG) || ((st.st_mode & S_IFMT) == S_IFBLK))) { + error("not a regular file or block device: %s", filename); + return NULL; + } - if (!writes) - flags = O_RDONLY; + if (!(flags & OPEN_CTREE_WRITES)) + oflags = O_RDONLY; - fp = open(filename, flags, 0600); + fp = open(filename, oflags); if (fp < 0) { - fprintf (stderr, "Could not open %s\n", filename); + error("cannot open '%s': %s", filename, strerror(errno)); return NULL; } info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr, - writes, partial); + chunk_root_bytenr, flags); close(fp); return info; } -struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes) +struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, + enum btrfs_open_ctree_flags flags) { struct btrfs_fs_info *info; - info = open_ctree_fs_info(filename, sb_bytenr, 0, writes, 0); + /* This flags may not return fs_info with any valid root */ + BUG_ON(flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR); + info = open_ctree_fs_info(filename, sb_bytenr, 0, 0, flags); if (!info) return NULL; + if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) + return info->chunk_root; return info->fs_root; } struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, - int writes) + enum btrfs_open_ctree_flags flags) { struct btrfs_fs_info *info; - info = __open_ctree_fd(fp, path, sb_bytenr, 0, writes, 0); + + /* This flags may not return fs_info with any valid root */ + BUG_ON(flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR); + info = __open_ctree_fd(fp, path, sb_bytenr, 0, 0, flags); if (!info) return NULL; + if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) + return info->chunk_root; return info->fs_root; } -int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr) +/* + * Check if the super is valid: + * - nodesize/sectorsize - minimum, maximum, alignment + * - tree block starts - alignment + * - number of devices - something sane + * - sys array size - maximum + */ +static int check_super(struct btrfs_super_block *sb) +{ + char result[BTRFS_CSUM_SIZE]; + u32 crc; + u16 csum_type; + int csum_size; + + if (btrfs_super_magic(sb) != BTRFS_MAGIC) { + error("superblock magic doesn't match"); + return -EIO; + } + + csum_type = btrfs_super_csum_type(sb); + if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { + error("unsupported checksum algorithm %u\n", csum_type); + return -EIO; + } + csum_size = btrfs_csum_sizes[csum_type]; + + crc = ~(u32)0; + crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, result); + + if (memcmp(result, sb->csum, csum_size)) { + error("superblock checksum mismatch"); + return -EIO; + } + if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { + error("tree_root level too big: %d >= %d", + btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); + goto error_out; + } + if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { + error("chunk_root level too big: %d >= %d", + btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); + goto error_out; + } + if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { + error("log_root level too big: %d >= %d", + btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); + goto error_out; + } + + if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) { + error("tree_root block unaligned: %llu", btrfs_super_root(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) { + error("chunk_root block unaligned: %llu", + btrfs_super_chunk_root(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) { + error("log_root block unaligned: %llu", + btrfs_super_log_root(sb)); + goto error_out; + } + if (btrfs_super_nodesize(sb) < 4096) { + error("nodesize too small: %u < 4096", + btrfs_super_nodesize(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) { + error("nodesize unaligned: %u", btrfs_super_nodesize(sb)); + goto error_out; + } + if (btrfs_super_sectorsize(sb) < 4096) { + error("sectorsize too small: %u < 4096", + btrfs_super_sectorsize(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) { + error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb)); + goto error_out; + } + if (btrfs_super_total_bytes(sb) == 0) { + error("invalid total_bytes 0"); + goto error_out; + } + if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { + error("invalid bytes_used %llu", btrfs_super_bytes_used(sb)); + goto error_out; + } + if ((btrfs_super_stripesize(sb) != 4096) + && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) { + error("invalid stripesize %u", btrfs_super_stripesize(sb)); + goto error_out; + } + + if (memcmp(sb->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { + char fsid[BTRFS_UUID_UNPARSED_SIZE]; + char dev_fsid[BTRFS_UUID_UNPARSED_SIZE]; + + uuid_unparse(sb->fsid, fsid); + uuid_unparse(sb->dev_item.fsid, dev_fsid); + error("dev_item UUID does not match fsid: %s != %s", + dev_fsid, fsid); + goto error_out; + } + + /* + * Hint to catch really bogus numbers, bitflips or so + */ + if (btrfs_super_num_devices(sb) > (1UL << 31)) { + warning("suspicious number of devices: %llu", + btrfs_super_num_devices(sb)); + } + + if (btrfs_super_num_devices(sb) == 0) { + error("number of devices is 0"); + goto error_out; + } + + /* + * Obvious sys_chunk_array corruptions, it must hold at least one key + * and one chunk + */ + if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { + error("system chunk array too big %u > %u", + btrfs_super_sys_array_size(sb), + BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); + goto error_out; + } + if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)) { + error("system chunk array too small %u < %lu", + btrfs_super_sys_array_size(sb), + sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)); + goto error_out; + } + + return 0; + +error_out: + error("superblock checksum matches but it has invalid members"); + return -EIO; +} + +int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr, + int super_recover) { u8 fsid[BTRFS_FSID_SIZE]; int fsid_is_initialized = 0; - struct btrfs_super_block buf; + char tmp[BTRFS_SUPER_INFO_SIZE]; + struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp; int i; int ret; + int max_super = super_recover ? BTRFS_SUPER_MIRROR_MAX : 1; u64 transid = 0; u64 bytenr; if (sb_bytenr != BTRFS_SUPER_INFO_OFFSET) { - ret = pread64(fd, &buf, sizeof(buf), sb_bytenr); - if (ret < sizeof(buf)) + ret = pread64(fd, buf, BTRFS_SUPER_INFO_SIZE, sb_bytenr); + if (ret < BTRFS_SUPER_INFO_SIZE) return -1; - if (btrfs_super_bytenr(&buf) != sb_bytenr || - buf.magic != cpu_to_le64(BTRFS_MAGIC)) + if (btrfs_super_bytenr(buf) != sb_bytenr) return -1; - memcpy(sb, &buf, sizeof(*sb)); + if (check_super(buf)) + return -1; + memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); return 0; } - for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + /* + * we would like to check all the supers, but that would make + * a btrfs mount succeed after a mkfs from a different FS. + * So, we need to add a special mount option to scan for + * later supers, using BTRFS_SUPER_MIRROR_MAX instead + */ + + for (i = 0; i < max_super; i++) { bytenr = btrfs_sb_offset(i); - ret = pread64(fd, &buf, sizeof(buf), bytenr); - if (ret < sizeof(buf)) + ret = pread64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr); + if (ret < BTRFS_SUPER_INFO_SIZE) break; - if (btrfs_super_bytenr(&buf) != bytenr ) + if (btrfs_super_bytenr(buf) != bytenr ) continue; /* if magic is NULL, the device was removed */ - if (buf.magic == 0 && i == 0) - return -1; - if (buf.magic != cpu_to_le64(BTRFS_MAGIC)) + if (btrfs_super_magic(buf) == 0 && i == 0) + break; + if (check_super(buf)) continue; if (!fsid_is_initialized) { - memcpy(fsid, buf.fsid, sizeof(fsid)); + memcpy(fsid, buf->fsid, sizeof(fsid)); fsid_is_initialized = 1; - } else if (memcmp(fsid, buf.fsid, sizeof(fsid))) { + } else if (memcmp(fsid, buf->fsid, sizeof(fsid))) { /* * the superblocks (the original one and * its backups) contain data of different @@ -1135,17 +1592,18 @@ int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr) continue; } - if (btrfs_super_generation(&buf) > transid) { - memcpy(sb, &buf, sizeof(*sb)); - transid = btrfs_super_generation(&buf); + if (btrfs_super_generation(buf) > transid) { + memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); + transid = btrfs_super_generation(buf); } } return transid > 0 ? 0 : -1; } -int write_dev_supers(struct btrfs_root *root, struct btrfs_super_block *sb, - struct btrfs_device *device) +static int write_dev_supers(struct btrfs_root *root, + struct btrfs_super_block *sb, + struct btrfs_device *device) { u64 bytenr; u32 crc; @@ -1165,7 +1623,8 @@ int write_dev_supers(struct btrfs_root *root, struct btrfs_super_block *sb, ret = pwrite64(device->fd, root->fs_info->super_copy, BTRFS_SUPER_INFO_SIZE, root->fs_info->super_bytenr); - BUG_ON(ret != BTRFS_SUPER_INFO_SIZE); + if (ret != BTRFS_SUPER_INFO_SIZE) + goto write_err; return 0; } @@ -1187,10 +1646,19 @@ int write_dev_supers(struct btrfs_root *root, struct btrfs_super_block *sb, */ ret = pwrite64(device->fd, root->fs_info->super_copy, BTRFS_SUPER_INFO_SIZE, bytenr); - BUG_ON(ret != BTRFS_SUPER_INFO_SIZE); + if (ret != BTRFS_SUPER_INFO_SIZE) + goto write_err; } return 0; + +write_err: + if (ret > 0) + fprintf(stderr, "WARNING: failed to write all sb data\n"); + else + fprintf(stderr, "WARNING: failed to write sb: %s\n", + strerror(errno)); + return ret; } int write_all_supers(struct btrfs_root *root) @@ -1259,27 +1727,15 @@ int write_ctree_super(struct btrfs_trans_handle *trans, return ret; } -static void free_mapping_cache(struct btrfs_fs_info *fs_info) -{ - struct cache_tree *cache_tree = &fs_info->mapping_tree.cache_tree; - struct cache_extent *ce; - struct map_lookup *map; - - while ((ce = find_first_cache_extent(cache_tree, 0))) { - map = container_of(ce, struct map_lookup, ce); - remove_cache_extent(cache_tree, ce); - kfree(map); - } -} - -int close_ctree(struct btrfs_root *root) +int close_ctree_fs_info(struct btrfs_fs_info *fs_info) { int ret; struct btrfs_trans_handle *trans; - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *root = fs_info->tree_root; if (fs_info->last_trans_committed != fs_info->generation) { + BUG_ON(!root); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); trans = btrfs_start_transaction(root, 1); @@ -1292,41 +1748,12 @@ int close_ctree(struct btrfs_root *root) } btrfs_free_block_groups(fs_info); - free_fs_roots(fs_info); - - if (fs_info->extent_root->node) - free_extent_buffer(fs_info->extent_root->node); - if (fs_info->tree_root->node) - free_extent_buffer(fs_info->tree_root->node); - if (fs_info->chunk_root->node) - free_extent_buffer(fs_info->chunk_root->node); - if (fs_info->dev_root->node) - free_extent_buffer(fs_info->dev_root->node); - if (fs_info->csum_root->node) - free_extent_buffer(fs_info->csum_root->node); - - if (fs_info->log_root_tree) { - if (fs_info->log_root_tree->node) - free_extent_buffer(fs_info->log_root_tree->node); - free(fs_info->log_root_tree); - } + free_fs_roots_tree(&fs_info->fs_root_tree); + btrfs_release_all_roots(fs_info); btrfs_close_devices(fs_info->fs_devices); - free_mapping_cache(fs_info); - extent_io_tree_cleanup(&fs_info->extent_cache); - extent_io_tree_cleanup(&fs_info->free_space_cache); - extent_io_tree_cleanup(&fs_info->block_group_cache); - extent_io_tree_cleanup(&fs_info->pinned_extents); - extent_io_tree_cleanup(&fs_info->pending_del); - extent_io_tree_cleanup(&fs_info->extent_ins); - - free(fs_info->tree_root); - free(fs_info->extent_root); - free(fs_info->chunk_root); - free(fs_info->dev_root); - free(fs_info->csum_root); - free(fs_info); - + btrfs_cleanup_all_caches(fs_info); + btrfs_free_fs_info(fs_info); return 0; }