X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=disk-io.c;h=76958aef239e511c21e072c757c0f5e2e23aea59;hb=e02049d964e77d22348bf940a7027a1aff558f18;hp=0aec56e0df28372e5f61e8e380a08fd2543deafe;hpb=34a5ec12eedbd13f47c92108e4fb27e08598219a;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/disk-io.c b/disk-io.c index 0aec56e..76958ae 100644 --- a/disk-io.c +++ b/disk-io.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "kerncompat.h" #include "radix-tree.h" #include "ctree.h" @@ -33,21 +34,48 @@ #include "print-tree.h" #include "rbtree-utils.h" -static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) +/* specified errno for check_tree_block */ +#define BTRFS_BAD_BYTENR (-1) +#define BTRFS_BAD_FSID (-2) +#define BTRFS_BAD_LEVEL (-3) +#define BTRFS_BAD_NRITEMS (-4) + +/* Calculate max possible nritems for a leaf/node */ +static u32 max_nritems(u8 level, u32 nodesize) { - struct btrfs_fs_devices *fs_devices; - int ret = 1; + if (level == 0) + return ((nodesize - sizeof(struct btrfs_header)) / + sizeof(struct btrfs_item)); + return ((nodesize - sizeof(struct btrfs_header)) / + sizeof(struct btrfs_key_ptr)); +} - if (buf->start != btrfs_header_bytenr(buf)) { - printk("Check tree block failed, want=%Lu, have=%Lu\n", - buf->start, btrfs_header_bytenr(buf)); - return ret; - } +static int check_tree_block(struct btrfs_fs_info *fs_info, + struct extent_buffer *buf) +{ - fs_devices = root->fs_info->fs_devices; + struct btrfs_fs_devices *fs_devices; + u32 nodesize = fs_info->nodesize; + int ret = BTRFS_BAD_FSID; + + if (buf->start != btrfs_header_bytenr(buf)) + return BTRFS_BAD_BYTENR; + if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL) + return BTRFS_BAD_LEVEL; + if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf), + nodesize)) + return BTRFS_BAD_NRITEMS; + + /* Only leaf can be empty */ + if (btrfs_header_nritems(buf) == 0 && + btrfs_header_level(buf) != 0) + return BTRFS_BAD_NRITEMS; + + fs_devices = fs_info->fs_devices; while (fs_devices) { - if (!memcmp_extent_buffer(buf, fs_devices->fsid, + if (fs_info->ignore_fsid_mismatch || + !memcmp_extent_buffer(buf, fs_devices->fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE)) { ret = 0; @@ -58,27 +86,55 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) return ret; } -u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) +static void print_tree_block_error(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, + int err) +{ + char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; + char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; + u8 buf[BTRFS_UUID_SIZE]; + + switch (err) { + case BTRFS_BAD_FSID: + read_extent_buffer(eb, buf, btrfs_header_fsid(), + BTRFS_UUID_SIZE); + uuid_unparse(buf, found_uuid); + uuid_unparse(fs_info->fsid, fs_uuid); + fprintf(stderr, "fsid mismatch, want=%s, have=%s\n", + fs_uuid, found_uuid); + break; + case BTRFS_BAD_BYTENR: + fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n", + eb->start, btrfs_header_bytenr(eb)); + break; + case BTRFS_BAD_LEVEL: + fprintf(stderr, "bad level, %u > %u\n", + btrfs_header_level(eb), BTRFS_MAX_LEVEL); + break; + case BTRFS_BAD_NRITEMS: + fprintf(stderr, "invalid nr_items: %u\n", + btrfs_header_nritems(eb)); + break; + } +} + +u32 btrfs_csum_data(char *data, u32 seed, size_t len) { return crc32c(seed, data, len); } -void btrfs_csum_final(u32 crc, char *result) +void btrfs_csum_final(u32 crc, u8 *result) { - *(__le32 *)result = ~cpu_to_le32(crc); + put_unaligned_le32(~crc, result); } static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify, int silent) { - char *result; + u8 result[BTRFS_CSUM_SIZE]; u32 len; u32 crc = ~(u32)0; - result = malloc(csum_size * sizeof(char)); - if (!result) - return 1; - len = buf->len - BTRFS_CSUM_SIZE; crc = crc32c(crc, buf->data + BTRFS_CSUM_SIZE, len); btrfs_csum_final(crc, result); @@ -90,13 +146,11 @@ static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, (unsigned long long)buf->start, *((u32 *)result), *((u32*)(char *)buf->data)); - free(result); return 1; } } else { write_extent_buffer(buf, result, 0, csum_size); } - free(result); return 0; } @@ -110,44 +164,46 @@ int verify_tree_block_csum_silent(struct extent_buffer *buf, u16 csum_size) return __csum_tree_block_size(buf, csum_size, 1, 1); } -int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, - int verify) +int csum_tree_block(struct btrfs_fs_info *fs_info, + struct extent_buffer *buf, int verify) { u16 csum_size = - btrfs_super_csum_size(root->fs_info->super_copy); + btrfs_super_csum_size(fs_info->super_copy); + if (verify && fs_info->suppress_check_block_errors) + return verify_tree_block_csum_silent(buf, csum_size); return csum_tree_block_size(buf, csum_size, verify); } -struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, +struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize) { - return find_extent_buffer(&root->fs_info->extent_cache, + return find_extent_buffer(&fs_info->extent_cache, bytenr, blocksize); } -struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) +struct extent_buffer* btrfs_find_create_tree_block( + struct btrfs_fs_info *fs_info, u64 bytenr) { - return alloc_extent_buffer(&root->fs_info->extent_cache, bytenr, - blocksize); + return alloc_extent_buffer(&fs_info->extent_cache, bytenr, + fs_info->nodesize); } -void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, - u64 parent_transid) +void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, + u64 parent_transid) { struct extent_buffer *eb; u64 length; struct btrfs_multi_bio *multi = NULL; struct btrfs_device *device; - eb = btrfs_find_tree_block(root, bytenr, blocksize); + eb = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize); if (!(eb && btrfs_buffer_uptodate(eb, parent_transid)) && - !btrfs_map_block(&root->fs_info->mapping_tree, READ, - bytenr, &length, &multi, 0, NULL)) { + !btrfs_map_block(fs_info, READ, bytenr, &length, &multi, 0, + NULL)) { device = multi->stripes[0].dev; device->total_ios++; - blocksize = min(blocksize, (u32)(64 * 1024)); - readahead(device->fd, multi->stripes[0].physical, blocksize); + readahead(device->fd, multi->stripes[0].physical, + fs_info->nodesize); } free_extent_buffer(eb); @@ -180,7 +236,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, ret = 1; out: - clear_extent_buffer_uptodate(io_tree, eb); + clear_extent_buffer_uptodate(eb); return ret; } @@ -201,9 +257,8 @@ int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirr if (!info->on_restoring && eb->start != BTRFS_SUPER_INFO_OFFSET) { - ret = btrfs_map_block(&info->mapping_tree, READ, - eb->start + offset, &read_len, &multi, - mirror, NULL); + ret = btrfs_map_block(info, READ, eb->start + offset, + &read_len, &multi, mirror, NULL); if (ret) { printk("Couldn't map the block %Lu\n", eb->start + offset); kfree(multi); @@ -211,7 +266,7 @@ int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirr } device = multi->stripes[0].dev; - if (device->fd == 0) { + if (device->fd <= 0) { kfree(multi); return -EIO; } @@ -245,18 +300,31 @@ int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirr return 0; } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize, u64 parent_transid) +struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, + u64 parent_transid) { int ret; struct extent_buffer *eb; u64 best_transid = 0; + u32 sectorsize = fs_info->sectorsize; int mirror_num = 0; int good_mirror = 0; int num_copies; int ignore = 0; - eb = btrfs_find_create_tree_block(root, bytenr, blocksize); + /* + * Don't even try to create tree block for unaligned tree block + * bytenr. + * Such unaligned tree block will free overlapping extent buffer, + * causing use-after-free bugs for fuzzed images. + */ + if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) { + error("tree block bytenr %llu is not aligned to sectorsize %u", + bytenr, sectorsize); + return ERR_PTR(-EIO); + } + + eb = btrfs_find_create_tree_block(fs_info, bytenr); if (!eb) return ERR_PTR(-ENOMEM); @@ -264,30 +332,33 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return eb; while (1) { - ret = read_whole_eb(root->fs_info, eb, mirror_num); - if (ret == 0 && check_tree_block(root, eb) == 0 && - csum_tree_block(root, eb, 1) == 0 && + ret = read_whole_eb(fs_info, eb, mirror_num); + if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 && + check_tree_block(fs_info, eb) == 0 && verify_parent_transid(eb->tree, eb, parent_transid, ignore) == 0) { if (eb->flags & EXTENT_BAD_TRANSID && list_empty(&eb->recow)) { list_add_tail(&eb->recow, - &root->fs_info->recow_ebs); + &fs_info->recow_ebs); eb->refs++; } btrfs_set_buffer_uptodate(eb); return eb; } if (ignore) { - if (check_tree_block(root, eb)) - printk("read block failed check_tree_block\n"); - else - printk("Csum didn't match\n"); + if (check_tree_block(fs_info, eb)) { + if (!fs_info->suppress_check_block_errors) + print_tree_block_error(fs_info, eb, + check_tree_block(fs_info, eb)); + } else { + if (!fs_info->suppress_check_block_errors) + fprintf(stderr, "Csum didn't match\n"); + } ret = -EIO; break; } - num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, - eb->start, eb->len); + num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); if (num_copies == 1) { ignore = 1; continue; @@ -307,9 +378,40 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return ERR_PTR(ret); } -int write_and_map_eb(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *eb) +int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical, + u64 *len, int mirror) +{ + u64 offset = 0; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + int ret = 0; + u64 max_len = *len; + + ret = btrfs_map_block(fs_info, READ, logical, len, &multi, mirror, + NULL); + if (ret) { + fprintf(stderr, "Couldn't map the block %llu\n", + logical + offset); + goto err; + } + device = multi->stripes[0].dev; + + if (device->fd <= 0) + goto err; + if (*len > max_len) + *len = max_len; + + ret = pread64(device->fd, data, *len, multi->stripes[0].physical); + if (ret != *len) + ret = -EIO; + else + ret = 0; +err: + kfree(multi); + return ret; +} + +int write_and_map_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { int ret; int dev_nr; @@ -319,11 +421,11 @@ int write_and_map_eb(struct btrfs_trans_handle *trans, dev_nr = 0; length = eb->len; - ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE, - eb->start, &length, &multi, 0, &raid_map); + ret = btrfs_map_block(fs_info, WRITE, eb->start, &length, + &multi, 0, &raid_map); if (raid_map) { - ret = write_raid56_with_parity(root->fs_info, eb, multi, + ret = write_raid56_with_parity(fs_info, eb, multi, length, raid_map); BUG_ON(ret); } else while (dev_nr < multi->num_stripes) { @@ -335,43 +437,41 @@ int write_and_map_eb(struct btrfs_trans_handle *trans, ret = write_extent_to_disk(eb); BUG_ON(ret); } + kfree(raid_map); kfree(multi); return 0; } -static int write_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int write_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { - if (check_tree_block(root, eb)) + if (check_tree_block(fs_info, eb)) { + print_tree_block_error(fs_info, eb, + check_tree_block(fs_info, eb)); BUG(); + } - if (!btrfs_buffer_uptodate(eb, trans->transid)) + if (trans && !btrfs_buffer_uptodate(eb, trans->transid)) BUG(); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - csum_tree_block(root, eb, 0); + csum_tree_block(fs_info, eb, 0); - return write_and_map_eb(trans, root, eb); + return write_and_map_eb(fs_info, eb); } -int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, - u32 stripesize, struct btrfs_root *root, - struct btrfs_fs_info *fs_info, u64 objectid) +void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, + u64 objectid) { root->node = NULL; root->commit_root = NULL; - root->sectorsize = sectorsize; - root->nodesize = nodesize; - root->leafsize = leafsize; - root->stripesize = stripesize; root->ref_cows = 0; root->track_dirty = 0; root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; - root->highest_inode = 0; root->last_inode_alloc = 0; INIT_LIST_HEAD(&root->dirty_list); @@ -379,127 +479,6 @@ int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); root->root_key.objectid = objectid; - return 0; -} - -static int update_cowonly_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - int ret; - u64 old_root_bytenr; - struct btrfs_root *tree_root = root->fs_info->tree_root; - - btrfs_write_dirty_block_groups(trans, root); - while(1) { - old_root_bytenr = btrfs_root_bytenr(&root->root_item); - if (old_root_bytenr == root->node->start) - break; - btrfs_set_root_bytenr(&root->root_item, - root->node->start); - btrfs_set_root_generation(&root->root_item, - trans->transid); - root->root_item.level = btrfs_header_level(root->node); - ret = btrfs_update_root(trans, tree_root, - &root->root_key, - &root->root_item); - BUG_ON(ret); - btrfs_write_dirty_block_groups(trans, root); - } - return 0; -} - -static int commit_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_root *root; - struct list_head *next; - struct extent_buffer *eb; - int ret; - - if (fs_info->readonly) - return 0; - - eb = fs_info->tree_root->node; - extent_buffer_get(eb); - ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); - free_extent_buffer(eb); - if (ret) - return ret; - - while(!list_empty(&fs_info->dirty_cowonly_roots)) { - next = fs_info->dirty_cowonly_roots.next; - list_del_init(next); - root = list_entry(next, struct btrfs_root, dirty_list); - update_cowonly_root(trans, root); - free_extent_buffer(root->commit_root); - root->commit_root = NULL; - } - - return 0; -} - -static int __commit_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - u64 start; - u64 end; - struct extent_buffer *eb; - struct extent_io_tree *tree = &root->fs_info->extent_cache; - int ret; - - while(1) { - ret = find_first_extent_bit(tree, 0, &start, &end, - EXTENT_DIRTY); - if (ret) - break; - while(start <= end) { - eb = find_first_extent_buffer(tree, start); - BUG_ON(!eb || eb->start != start); - ret = write_tree_block(trans, root, eb); - BUG_ON(ret); - start += eb->len; - clear_extent_buffer_dirty(eb); - free_extent_buffer(eb); - } - } - return 0; -} - -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - u64 transid = trans->transid; - int ret = 0; - struct btrfs_fs_info *fs_info = root->fs_info; - - if (root->commit_root == root->node) - goto commit_tree; - if (root == root->fs_info->tree_root) - goto commit_tree; - - free_extent_buffer(root->commit_root); - root->commit_root = NULL; - - btrfs_set_root_bytenr(&root->root_item, root->node->start); - btrfs_set_root_generation(&root->root_item, trans->transid); - root->root_item.level = btrfs_header_level(root->node); - ret = btrfs_update_root(trans, root->fs_info->tree_root, - &root->root_key, &root->root_item); - BUG_ON(ret); -commit_tree: - ret = commit_tree_roots(trans, fs_info); - BUG_ON(ret); - ret = __commit_transaction(trans, root); - BUG_ON(ret); - write_ctree_super(trans, root); - btrfs_finish_extent_commit(trans, fs_info->extent_root, - &fs_info->pinned_extents); - btrfs_free_transaction(root, trans); - free_extent_buffer(root->commit_root); - root->commit_root = NULL; - fs_info->running_transaction = NULL; - fs_info->last_trans_committed = transid; - return 0; } static int find_and_setup_root(struct btrfs_root *tree_root, @@ -507,21 +486,17 @@ static int find_and_setup_root(struct btrfs_root *tree_root, u64 objectid, struct btrfs_root *root) { int ret; - u32 blocksize; u64 generation; - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, objectid); + btrfs_setup_root(root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); if (ret) return ret; - blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); generation = btrfs_root_generation(&root->root_item); - root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize, generation); + root->node = read_tree_block(fs_info, + btrfs_root_bytenr(&root->root_item), generation); if (!extent_buffer_uptodate(root->node)) return -EIO; @@ -532,7 +507,6 @@ static int find_and_setup_log_root(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, struct btrfs_super_block *disk_super) { - u32 blocksize; u64 blocknr = btrfs_super_log_root(disk_super); struct btrfs_root *log_root = malloc(sizeof(struct btrfs_root)); @@ -544,15 +518,10 @@ static int find_and_setup_log_root(struct btrfs_root *tree_root, return 0; } - blocksize = btrfs_level_size(tree_root, - btrfs_super_log_root_level(disk_super)); - - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - log_root, fs_info, BTRFS_TREE_LOG_OBJECTID); + btrfs_setup_root(log_root, fs_info, + BTRFS_TREE_LOG_OBJECTID); - log_root->node = read_tree_block(tree_root, blocknr, - blocksize, + log_root->node = read_tree_block(fs_info, blocknr, btrfs_super_generation(disk_super) + 1); fs_info->log_root_tree = log_root; @@ -595,13 +564,11 @@ struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, struct btrfs_path *path; struct extent_buffer *l; u64 generation; - u32 blocksize; int ret = 0; - root = malloc(sizeof(*root)); + root = calloc(1, sizeof(*root)); if (!root) return ERR_PTR(-ENOMEM); - memset(root, 0, sizeof(*root)); if (location->offset == (u64)-1) { ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); @@ -612,12 +579,15 @@ struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, goto insert; } - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, location->objectid); + btrfs_setup_root(root, fs_info, + location->objectid); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + free(root); + return ERR_PTR(-ENOMEM); + } + ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); if (ret != 0) { if (ret > 0) @@ -637,9 +607,8 @@ out: return ERR_PTR(ret); } generation = btrfs_root_generation(&root->root_item); - blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); - root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize, generation); + root->node = read_tree_block(fs_info, + btrfs_root_bytenr(&root->root_item), generation); if (!extent_buffer_uptodate(root->node)) { free(root); return ERR_PTR(-EIO); @@ -692,7 +661,8 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) return fs_info->csum_root; if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) - return fs_info->quota_root; + return fs_info->quota_enabled ? fs_info->quota_root : + ERR_PTR(-ENOENT); BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID || location->offset != (u64)-1); @@ -714,12 +684,15 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) { + if (fs_info->quota_root) + free(fs_info->quota_root); + free(fs_info->tree_root); free(fs_info->extent_root); free(fs_info->chunk_root); free(fs_info->dev_root); free(fs_info->csum_root); - free(fs_info->quota_root); + free(fs_info->free_space_root); free(fs_info->super_copy); free(fs_info->log_root_tree); free(fs_info); @@ -729,34 +702,25 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr) { struct btrfs_fs_info *fs_info; - fs_info = malloc(sizeof(struct btrfs_fs_info)); + fs_info = calloc(1, sizeof(struct btrfs_fs_info)); if (!fs_info) return NULL; - memset(fs_info, 0, sizeof(struct btrfs_fs_info)); - - fs_info->tree_root = malloc(sizeof(struct btrfs_root)); - fs_info->extent_root = malloc(sizeof(struct btrfs_root)); - fs_info->chunk_root = malloc(sizeof(struct btrfs_root)); - fs_info->dev_root = malloc(sizeof(struct btrfs_root)); - fs_info->csum_root = malloc(sizeof(struct btrfs_root)); - fs_info->quota_root = malloc(sizeof(struct btrfs_root)); - fs_info->super_copy = malloc(BTRFS_SUPER_INFO_SIZE); + fs_info->tree_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->extent_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->dev_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->csum_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->quota_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->free_space_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE); if (!fs_info->tree_root || !fs_info->extent_root || !fs_info->chunk_root || !fs_info->dev_root || !fs_info->csum_root || !fs_info->quota_root || - !fs_info->super_copy) + !fs_info->free_space_root || !fs_info->super_copy) goto free_all; - memset(fs_info->super_copy, 0, BTRFS_SUPER_INFO_SIZE); - memset(fs_info->tree_root, 0, sizeof(struct btrfs_root)); - memset(fs_info->extent_root, 0, sizeof(struct btrfs_root)); - memset(fs_info->chunk_root, 0, sizeof(struct btrfs_root)); - memset(fs_info->dev_root, 0, sizeof(struct btrfs_root)); - memset(fs_info->csum_root, 0, sizeof(struct btrfs_root)); - memset(fs_info->quota_root, 0, sizeof(struct btrfs_root)); - extent_io_tree_init(&fs_info->extent_cache); extent_io_tree_init(&fs_info->free_space_cache); extent_io_tree_init(&fs_info->block_group_cache); @@ -786,7 +750,8 @@ free_all: return NULL; } -int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, int writable) +int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, + unsigned int flags) { u64 features; @@ -805,13 +770,22 @@ int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, int writable) btrfs_set_super_incompat_flags(sb, features); } - features = btrfs_super_compat_ro_flags(sb) & - ~BTRFS_FEATURE_COMPAT_RO_SUPP; - if (writable && features) { - printk("couldn't open RDWR because of unsupported " - "option features (%Lx).\n", - (unsigned long long)features); - return -ENOTSUP; + features = btrfs_super_compat_ro_flags(sb); + if (flags & OPEN_CTREE_WRITES) { + if (flags & OPEN_CTREE_INVALIDATE_FST) { + /* Clear the FREE_SPACE_TREE_VALID bit on disk... */ + features &= ~BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID; + btrfs_set_super_compat_ro_flags(sb, features); + /* ... and ignore the free space tree bit. */ + features &= ~BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE; + } + if (features & ~BTRFS_FEATURE_COMPAT_RO_SUPP) { + printk("couldn't open RDWR because of unsupported " + "option features (%Lx).\n", + (unsigned long long)features); + return -ENOTSUP; + } + } return 0; } @@ -836,13 +810,11 @@ static int find_best_backup_root(struct btrfs_super_block *super) } static int setup_root_or_create_block(struct btrfs_fs_info *fs_info, - enum btrfs_open_ctree_flags flags, + unsigned flags, struct btrfs_root *info_root, u64 objectid, char *str) { - struct btrfs_super_block *sb = fs_info->super_copy; struct btrfs_root *root = fs_info->tree_root; - u32 leafsize = btrfs_super_leafsize(sb); int ret; ret = find_and_setup_root(root, fs_info, objectid, info_root); @@ -855,38 +827,26 @@ static int setup_root_or_create_block(struct btrfs_fs_info *fs_info, * million of places that assume a root has a valid ->node */ info_root->node = - btrfs_find_create_tree_block(info_root, 0, leafsize); + btrfs_find_create_tree_block(fs_info, 0); if (!info_root->node) return -ENOMEM; - clear_extent_buffer_uptodate(NULL, info_root->node); + clear_extent_buffer_uptodate(info_root->node); } return 0; } int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, - enum btrfs_open_ctree_flags flags) + unsigned flags) { struct btrfs_super_block *sb = fs_info->super_copy; struct btrfs_root *root; struct btrfs_key key; - u32 sectorsize; - u32 nodesize; - u32 leafsize; - u32 stripesize; u64 generation; - u32 blocksize; int ret; - nodesize = btrfs_super_nodesize(sb); - leafsize = btrfs_super_leafsize(sb); - sectorsize = btrfs_super_sectorsize(sb); - stripesize = btrfs_super_stripesize(sb); - root = fs_info->tree_root; - __setup_root(nodesize, leafsize, sectorsize, stripesize, - root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - blocksize = btrfs_level_size(root, btrfs_super_root_level(sb)); + btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID); generation = btrfs_super_generation(sb); if (!root_tree_bytenr && !(flags & OPEN_CTREE_BACKUP_ROOT)) { @@ -903,8 +863,7 @@ int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, generation = btrfs_backup_tree_root_gen(backup); } - root->node = read_tree_block(root, root_tree_bytenr, blocksize, - generation); + root->node = read_tree_block(fs_info, root_tree_bytenr, generation); if (!extent_buffer_uptodate(root->node)) { fprintf(stderr, "Couldn't read tree root\n"); return -EIO; @@ -932,8 +891,22 @@ int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, ret = find_and_setup_root(root, fs_info, BTRFS_QUOTA_TREE_OBJECTID, fs_info->quota_root); - if (ret == 0) + if (ret) { + free(fs_info->quota_root); + fs_info->quota_root = NULL; + } else { fs_info->quota_enabled = 1; + } + + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + ret = find_and_setup_root(root, fs_info, BTRFS_FREE_SPACE_TREE_OBJECTID, + fs_info->free_space_root); + if (ret) { + printk("Couldn't read free space tree\n"); + return -EIO; + } + fs_info->free_space_root->track_dirty = 1; + } ret = find_and_setup_log_root(root, fs_info, sb); if (ret) { @@ -945,8 +918,16 @@ int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, fs_info->generation = generation; fs_info->last_trans_committed = generation; if (extent_buffer_uptodate(fs_info->extent_root->node) && - !(flags & OPEN_CTREE_NO_BLOCK_GROUPS)) - btrfs_read_block_groups(fs_info->tree_root); + !(flags & OPEN_CTREE_NO_BLOCK_GROUPS)) { + ret = btrfs_read_block_groups(fs_info->tree_root); + /* + * If we don't find any blockgroups (ENOENT) we're either + * restoring or creating the filesystem, where it's expected, + * anything else is error + */ + if (ret != -ENOENT) + return -EIO; + } key.objectid = BTRFS_FS_TREE_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; @@ -960,6 +941,8 @@ int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, void btrfs_release_all_roots(struct btrfs_fs_info *fs_info) { + if (fs_info->free_space_root) + free_extent_buffer(fs_info->free_space_root->node); if (fs_info->quota_root) free_extent_buffer(fs_info->quota_root->node); if (fs_info->csum_root) @@ -1006,7 +989,7 @@ void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info) int btrfs_scan_fs_devices(int fd, const char *path, struct btrfs_fs_devices **fs_devices, - u64 sb_bytenr, int super_recover, + u64 sb_bytenr, unsigned sbflags, int skip_devices) { u64 total_devs; @@ -1023,62 +1006,71 @@ int btrfs_scan_fs_devices(int fd, const char *path, dev_size = seek_ret; lseek(fd, 0, SEEK_SET); if (sb_bytenr > dev_size) { - fprintf(stderr, "Superblock bytenr is larger than device size\n"); + error("superblock bytenr %llu is larger than device size %llu", + (unsigned long long)sb_bytenr, + (unsigned long long)dev_size); return -EINVAL; } ret = btrfs_scan_one_device(fd, path, fs_devices, - &total_devs, sb_bytenr, super_recover); + &total_devs, sb_bytenr, sbflags); if (ret) { fprintf(stderr, "No valid Btrfs found on %s\n", path); return ret; } if (!skip_devices && total_devs != 1) { - ret = btrfs_scan_lblkid(); + ret = btrfs_scan_devices(); if (ret) return ret; } return 0; } -int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info) +int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info, + u64 chunk_root_bytenr) { struct btrfs_super_block *sb = fs_info->super_copy; - u32 sectorsize; - u32 nodesize; - u32 leafsize; - u32 blocksize; - u32 stripesize; u64 generation; int ret; - nodesize = btrfs_super_nodesize(sb); - leafsize = btrfs_super_leafsize(sb); - sectorsize = btrfs_super_sectorsize(sb); - stripesize = btrfs_super_stripesize(sb); + btrfs_setup_root(fs_info->chunk_root, fs_info, + BTRFS_CHUNK_TREE_OBJECTID); - __setup_root(nodesize, leafsize, sectorsize, stripesize, - fs_info->chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); - - ret = btrfs_read_sys_array(fs_info->chunk_root); + ret = btrfs_read_sys_array(fs_info); if (ret) return ret; - blocksize = btrfs_level_size(fs_info->chunk_root, - btrfs_super_chunk_root_level(sb)); generation = btrfs_super_chunk_root_generation(sb); - fs_info->chunk_root->node = read_tree_block(fs_info->chunk_root, - btrfs_super_chunk_root(sb), - blocksize, generation); + if (chunk_root_bytenr && !IS_ALIGNED(chunk_root_bytenr, + fs_info->sectorsize)) { + warning("chunk_root_bytenr %llu is unaligned to %u, ignore it", + chunk_root_bytenr, fs_info->sectorsize); + chunk_root_bytenr = 0; + } + + if (!chunk_root_bytenr) + chunk_root_bytenr = btrfs_super_chunk_root(sb); + else + generation = 0; + + fs_info->chunk_root->node = read_tree_block(fs_info, + chunk_root_bytenr, + generation); if (!extent_buffer_uptodate(fs_info->chunk_root->node)) { - fprintf(stderr, "Couldn't read chunk root\n"); - return -EIO; + if (fs_info->ignore_chunk_tree_error) { + warning("cannot read chunk root, continue anyway"); + fs_info->chunk_root = NULL; + return 0; + } else { + error("cannot read chunk root"); + return -EIO; + } } if (!(btrfs_super_flags(sb) & BTRFS_SUPER_FLAG_METADUMP)) { - ret = btrfs_read_chunk_tree(fs_info->chunk_root); + ret = btrfs_read_chunk_tree(fs_info); if (ret) { fprintf(stderr, "Couldn't read chunk tree\n"); return ret; @@ -1090,7 +1082,8 @@ int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info) static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, u64 root_tree_bytenr, - enum btrfs_open_ctree_flags flags) + u64 chunk_root_bytenr, + unsigned flags) { struct btrfs_fs_info *fs_info; struct btrfs_super_block *disk_super; @@ -1098,6 +1091,7 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, struct extent_buffer *eb; int ret; int oflags; + unsigned sbflags = SBREAD_DEFAULT; if (sb_bytenr == 0) sb_bytenr = BTRFS_SUPER_INFO_OFFSET; @@ -1113,10 +1107,25 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, } if (flags & OPEN_CTREE_RESTORE) fs_info->on_restoring = 1; + if (flags & OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS) + fs_info->suppress_check_block_errors = 1; + if (flags & OPEN_CTREE_IGNORE_FSID_MISMATCH) + fs_info->ignore_fsid_mismatch = 1; + if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR) + fs_info->ignore_chunk_tree_error = 1; + + if ((flags & OPEN_CTREE_RECOVER_SUPER) + && (flags & OPEN_CTREE_FS_PARTIAL)) { + fprintf(stderr, + "cannot open a partially created filesystem for recovery"); + goto out; + } + + if (flags & OPEN_CTREE_FS_PARTIAL) + sbflags = SBREAD_PARTIAL; - ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr, - (flags & OPEN_CTREE_RECOVER_SUPER), - (flags & OPEN_CTREE_NO_DEVICES)); + ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr, sbflags, + (flags & OPEN_CTREE_NO_DEVICES)); if (ret) goto out; @@ -1134,34 +1143,48 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, goto out; disk_super = fs_info->super_copy; - if (!(flags & OPEN_CTREE_RECOVER_SUPER)) - ret = btrfs_read_dev_super(fs_devices->latest_bdev, - disk_super, sb_bytenr, 1); + if (flags & OPEN_CTREE_RECOVER_SUPER) + ret = btrfs_read_dev_super(fs_devices->latest_bdev, disk_super, + sb_bytenr, SBREAD_RECOVER); else - ret = btrfs_read_dev_super(fp, disk_super, sb_bytenr, 0); + ret = btrfs_read_dev_super(fp, disk_super, sb_bytenr, + sbflags); if (ret) { printk("No valid btrfs found\n"); goto out_devices; } + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID && + !fs_info->ignore_fsid_mismatch) { + fprintf(stderr, "ERROR: Filesystem UUID change in progress\n"); + goto out_devices; + } + memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE); + fs_info->sectorsize = btrfs_super_sectorsize(disk_super); + fs_info->nodesize = btrfs_super_nodesize(disk_super); + fs_info->stripesize = btrfs_super_stripesize(disk_super); - ret = btrfs_check_fs_compatibility(fs_info->super_copy, - flags & OPEN_CTREE_WRITES); + ret = btrfs_check_fs_compatibility(fs_info->super_copy, flags); if (ret) goto out_devices; - ret = btrfs_setup_chunk_tree_and_device_map(fs_info); + ret = btrfs_setup_chunk_tree_and_device_map(fs_info, chunk_root_bytenr); if (ret) goto out_chunk; + /* Chunk tree root is unable to read, return directly */ + if (!fs_info->chunk_root) + return fs_info; + eb = fs_info->chunk_root->node; read_extent_buffer(eb, fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(eb), BTRFS_UUID_SIZE); ret = btrfs_setup_all_roots(fs_info, root_tree_bytenr, flags); - if (ret) + if (ret && !(flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) && + !fs_info->ignore_chunk_tree_error) goto out_chunk; return fs_info; @@ -1178,69 +1201,271 @@ out: struct btrfs_fs_info *open_ctree_fs_info(const char *filename, u64 sb_bytenr, u64 root_tree_bytenr, - enum btrfs_open_ctree_flags flags) + u64 chunk_root_bytenr, + unsigned flags) { int fp; + int ret; struct btrfs_fs_info *info; - int oflags = O_CREAT | O_RDWR; + int oflags = O_RDWR; + struct stat st; + + ret = stat(filename, &st); + if (ret < 0) { + error("cannot stat '%s': %m", filename); + return NULL; + } + if (!(((st.st_mode & S_IFMT) == S_IFREG) || ((st.st_mode & S_IFMT) == S_IFBLK))) { + error("not a regular file or block device: %s", filename); + return NULL; + } if (!(flags & OPEN_CTREE_WRITES)) oflags = O_RDONLY; - fp = open(filename, oflags, 0600); + fp = open(filename, oflags); if (fp < 0) { - fprintf (stderr, "Could not open %s\n", filename); + error("cannot open '%s': %m", filename); return NULL; } info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr, - flags); + chunk_root_bytenr, flags); close(fp); return info; } struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, - enum btrfs_open_ctree_flags flags) + unsigned flags) { struct btrfs_fs_info *info; - info = open_ctree_fs_info(filename, sb_bytenr, 0, flags); + /* This flags may not return fs_info with any valid root */ + BUG_ON(flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR); + info = open_ctree_fs_info(filename, sb_bytenr, 0, 0, flags); if (!info) return NULL; + if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) + return info->chunk_root; return info->fs_root; } struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, - enum btrfs_open_ctree_flags flags) + unsigned flags) { struct btrfs_fs_info *info; - info = __open_ctree_fd(fp, path, sb_bytenr, 0, flags); + + /* This flags may not return fs_info with any valid root */ + if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR) { + error("invalid open_ctree flags: 0x%llx", + (unsigned long long)flags); + return NULL; + } + info = __open_ctree_fd(fp, path, sb_bytenr, 0, 0, flags); if (!info) return NULL; + if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) + return info->chunk_root; return info->fs_root; } +/* + * Check if the super is valid: + * - nodesize/sectorsize - minimum, maximum, alignment + * - tree block starts - alignment + * - number of devices - something sane + * - sys array size - maximum + */ +static int check_super(struct btrfs_super_block *sb, unsigned sbflags) +{ + u8 result[BTRFS_CSUM_SIZE]; + u32 crc; + u16 csum_type; + int csum_size; + + if (btrfs_super_magic(sb) != BTRFS_MAGIC) { + if (btrfs_super_magic(sb) == BTRFS_MAGIC_PARTIAL) { + if (!(sbflags & SBREAD_PARTIAL)) { + error("superblock magic doesn't match"); + return -EIO; + } + } + } + + csum_type = btrfs_super_csum_type(sb); + if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { + error("unsupported checksum algorithm %u", csum_type); + return -EIO; + } + csum_size = btrfs_csum_sizes[csum_type]; + + crc = ~(u32)0; + crc = btrfs_csum_data((char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, result); + + if (memcmp(result, sb->csum, csum_size)) { + error("superblock checksum mismatch"); + return -EIO; + } + if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { + error("tree_root level too big: %d >= %d", + btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); + goto error_out; + } + if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { + error("chunk_root level too big: %d >= %d", + btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); + goto error_out; + } + if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { + error("log_root level too big: %d >= %d", + btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); + goto error_out; + } + + if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) { + error("tree_root block unaligned: %llu", btrfs_super_root(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) { + error("chunk_root block unaligned: %llu", + btrfs_super_chunk_root(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) { + error("log_root block unaligned: %llu", + btrfs_super_log_root(sb)); + goto error_out; + } + if (btrfs_super_nodesize(sb) < 4096) { + error("nodesize too small: %u < 4096", + btrfs_super_nodesize(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) { + error("nodesize unaligned: %u", btrfs_super_nodesize(sb)); + goto error_out; + } + if (btrfs_super_sectorsize(sb) < 4096) { + error("sectorsize too small: %u < 4096", + btrfs_super_sectorsize(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) { + error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb)); + goto error_out; + } + if (btrfs_super_total_bytes(sb) == 0) { + error("invalid total_bytes 0"); + goto error_out; + } + if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { + error("invalid bytes_used %llu", btrfs_super_bytes_used(sb)); + goto error_out; + } + if ((btrfs_super_stripesize(sb) != 4096) + && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) { + error("invalid stripesize %u", btrfs_super_stripesize(sb)); + goto error_out; + } + + if (memcmp(sb->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { + char fsid[BTRFS_UUID_UNPARSED_SIZE]; + char dev_fsid[BTRFS_UUID_UNPARSED_SIZE]; + + uuid_unparse(sb->fsid, fsid); + uuid_unparse(sb->dev_item.fsid, dev_fsid); + error("dev_item UUID does not match fsid: %s != %s", + dev_fsid, fsid); + goto error_out; + } + + /* + * Hint to catch really bogus numbers, bitflips or so + */ + if (btrfs_super_num_devices(sb) > (1UL << 31)) { + warning("suspicious number of devices: %llu", + btrfs_super_num_devices(sb)); + } + + if (btrfs_super_num_devices(sb) == 0) { + error("number of devices is 0"); + goto error_out; + } + + /* + * Obvious sys_chunk_array corruptions, it must hold at least one key + * and one chunk + */ + if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { + error("system chunk array too big %u > %u", + btrfs_super_sys_array_size(sb), + BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); + goto error_out; + } + if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)) { + error("system chunk array too small %u < %zu", + btrfs_super_sys_array_size(sb), + sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)); + goto error_out; + } + + return 0; + +error_out: + error("superblock checksum matches but it has invalid members"); + return -EIO; +} + +/* + * btrfs_read_dev_super - read a valid superblock from a block device + * @fd: file descriptor of the device + * @sb: buffer where the superblock is going to be read in + * @sb_bytenr: offset of the particular superblock copy we want + * @sbflags: flags controlling how the superblock is read + * + * This function is used by various btrfs comands to obtain a valid superblock. + * + * It's mode of operation is controlled by the @sb_bytenr and @sbdflags + * parameters. If SBREAD_RECOVER flag is set and @sb_bytenr is + * BTRFS_SUPER_INFO_OFFSET then the function reads all 3 superblock copies and + * returns the newest one. If SBREAD_RECOVER is not set then only a single + * copy is read, which one is decided by @sb_bytenr. If @sb_bytenr != + * BTRFS_SUPER_INFO_OFFSET then the @sbflags is effectively ignored and only a + * single copy is read. + */ int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr, - int super_recover) + unsigned sbflags) { u8 fsid[BTRFS_FSID_SIZE]; int fsid_is_initialized = 0; - struct btrfs_super_block buf; + char tmp[BTRFS_SUPER_INFO_SIZE]; + struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp; int i; int ret; - int max_super = super_recover ? BTRFS_SUPER_MIRROR_MAX : 1; + int max_super = sbflags & SBREAD_RECOVER ? BTRFS_SUPER_MIRROR_MAX : 1; u64 transid = 0; u64 bytenr; if (sb_bytenr != BTRFS_SUPER_INFO_OFFSET) { - ret = pread64(fd, &buf, sizeof(buf), sb_bytenr); - if (ret < sizeof(buf)) - return -1; + ret = pread64(fd, buf, BTRFS_SUPER_INFO_SIZE, sb_bytenr); + /* real error */ + if (ret < 0) + return -errno; - if (btrfs_super_bytenr(&buf) != sb_bytenr || - btrfs_super_magic(&buf) != BTRFS_MAGIC) - return -1; + /* Not large enough sb, return -ENOENT instead of normal -EIO */ + if (ret < BTRFS_SUPER_INFO_SIZE) + return -ENOENT; - memcpy(sb, &buf, sizeof(*sb)); + if (btrfs_super_bytenr(buf) != sb_bytenr) + return -EIO; + + ret = check_super(buf, sbflags); + if (ret < 0) + return ret; + memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); return 0; } @@ -1253,22 +1478,22 @@ int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr, for (i = 0; i < max_super; i++) { bytenr = btrfs_sb_offset(i); - ret = pread64(fd, &buf, sizeof(buf), bytenr); - if (ret < sizeof(buf)) + ret = pread64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr); + if (ret < BTRFS_SUPER_INFO_SIZE) break; - if (btrfs_super_bytenr(&buf) != bytenr ) + if (btrfs_super_bytenr(buf) != bytenr ) continue; /* if magic is NULL, the device was removed */ - if (btrfs_super_magic(&buf) == 0 && i == 0) - return -1; - if (btrfs_super_magic(&buf) != BTRFS_MAGIC) + if (btrfs_super_magic(buf) == 0 && i == 0) + break; + if (check_super(buf, sbflags)) continue; if (!fsid_is_initialized) { - memcpy(fsid, buf.fsid, sizeof(fsid)); + memcpy(fsid, buf->fsid, sizeof(fsid)); fsid_is_initialized = 1; - } else if (memcmp(fsid, buf.fsid, sizeof(fsid))) { + } else if (memcmp(fsid, buf->fsid, sizeof(fsid))) { /* * the superblocks (the original one and * its backups) contain data of different @@ -1277,16 +1502,16 @@ int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr, continue; } - if (btrfs_super_generation(&buf) > transid) { - memcpy(sb, &buf, sizeof(*sb)); - transid = btrfs_super_generation(&buf); + if (btrfs_super_generation(buf) > transid) { + memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); + transid = btrfs_super_generation(buf); } } return transid > 0 ? 0 : -1; } -static int write_dev_supers(struct btrfs_root *root, +static int write_dev_supers(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb, struct btrfs_device *device) { @@ -1294,21 +1519,22 @@ static int write_dev_supers(struct btrfs_root *root, u32 crc; int i, ret; - if (root->fs_info->super_bytenr != BTRFS_SUPER_INFO_OFFSET) { - btrfs_set_super_bytenr(sb, root->fs_info->super_bytenr); + if (fs_info->super_bytenr != BTRFS_SUPER_INFO_OFFSET) { + btrfs_set_super_bytenr(sb, fs_info->super_bytenr); crc = ~(u32)0; - crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc, + crc = btrfs_csum_data((char *)sb + BTRFS_CSUM_SIZE, crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, (char *)&sb->csum[0]); + btrfs_csum_final(crc, &sb->csum[0]); /* * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is * zero filled, we can use it directly */ - ret = pwrite64(device->fd, root->fs_info->super_copy, + ret = pwrite64(device->fd, fs_info->super_copy, BTRFS_SUPER_INFO_SIZE, - root->fs_info->super_bytenr); - BUG_ON(ret != BTRFS_SUPER_INFO_SIZE); + fs_info->super_bytenr); + if (ret != BTRFS_SUPER_INFO_SIZE) + goto write_err; return 0; } @@ -1320,36 +1546,42 @@ static int write_dev_supers(struct btrfs_root *root, btrfs_set_super_bytenr(sb, bytenr); crc = ~(u32)0; - crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc, + crc = btrfs_csum_data((char *)sb + BTRFS_CSUM_SIZE, crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, (char *)&sb->csum[0]); + btrfs_csum_final(crc, &sb->csum[0]); /* * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is * zero filled, we can use it directly */ - ret = pwrite64(device->fd, root->fs_info->super_copy, + ret = pwrite64(device->fd, fs_info->super_copy, BTRFS_SUPER_INFO_SIZE, bytenr); - BUG_ON(ret != BTRFS_SUPER_INFO_SIZE); + if (ret != BTRFS_SUPER_INFO_SIZE) + goto write_err; } return 0; + +write_err: + if (ret > 0) + fprintf(stderr, "WARNING: failed to write all sb data\n"); + else + fprintf(stderr, "WARNING: failed to write sb: %m\n"); + return ret; } -int write_all_supers(struct btrfs_root *root) +int write_all_supers(struct btrfs_fs_info *fs_info) { - struct list_head *cur; - struct list_head *head = &root->fs_info->fs_devices->devices; + struct list_head *head = &fs_info->fs_devices->devices; struct btrfs_device *dev; struct btrfs_super_block *sb; struct btrfs_dev_item *dev_item; int ret; u64 flags; - sb = root->fs_info->super_copy; + sb = fs_info->super_copy; dev_item = &sb->dev_item; - list_for_each(cur, head) { - dev = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(dev, head, dev_list) { if (!dev->writeable) continue; @@ -1367,68 +1599,88 @@ int write_all_supers(struct btrfs_root *root) flags = btrfs_super_flags(sb); btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); - ret = write_dev_supers(root, sb, dev); + ret = write_dev_supers(fs_info, sb, dev); BUG_ON(ret); } return 0; } int write_ctree_super(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_fs_info *fs_info) { int ret; - struct btrfs_root *tree_root = root->fs_info->tree_root; - struct btrfs_root *chunk_root = root->fs_info->chunk_root; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *chunk_root = fs_info->chunk_root; - if (root->fs_info->readonly) + if (fs_info->readonly) return 0; - btrfs_set_super_generation(root->fs_info->super_copy, + btrfs_set_super_generation(fs_info->super_copy, trans->transid); - btrfs_set_super_root(root->fs_info->super_copy, + btrfs_set_super_root(fs_info->super_copy, tree_root->node->start); - btrfs_set_super_root_level(root->fs_info->super_copy, + btrfs_set_super_root_level(fs_info->super_copy, btrfs_header_level(tree_root->node)); - btrfs_set_super_chunk_root(root->fs_info->super_copy, + btrfs_set_super_chunk_root(fs_info->super_copy, chunk_root->node->start); - btrfs_set_super_chunk_root_level(root->fs_info->super_copy, + btrfs_set_super_chunk_root_level(fs_info->super_copy, btrfs_header_level(chunk_root->node)); - btrfs_set_super_chunk_root_generation(root->fs_info->super_copy, + btrfs_set_super_chunk_root_generation(fs_info->super_copy, btrfs_header_generation(chunk_root->node)); - ret = write_all_supers(root); + ret = write_all_supers(fs_info); if (ret) fprintf(stderr, "failed to write new super block err %d\n", ret); return ret; } -int close_ctree(struct btrfs_root *root) +int close_ctree_fs_info(struct btrfs_fs_info *fs_info) { int ret; + int err = 0; struct btrfs_trans_handle *trans; - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *root = fs_info->tree_root; if (fs_info->last_trans_committed != fs_info->generation) { + BUG_ON(!root); trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto skip_commit; + } btrfs_commit_transaction(trans, root); trans = btrfs_start_transaction(root, 1); + BUG_ON(IS_ERR(trans)); ret = commit_tree_roots(trans, fs_info); BUG_ON(ret); ret = __commit_transaction(trans, root); BUG_ON(ret); - write_ctree_super(trans, root); - btrfs_free_transaction(root, trans); + write_ctree_super(trans, fs_info); + kfree(trans); + } + + if (fs_info->finalize_on_close) { + btrfs_set_super_magic(fs_info->super_copy, BTRFS_MAGIC); + root->fs_info->finalize_on_close = 0; + ret = write_all_supers(fs_info); + if (ret) + fprintf(stderr, + "failed to write new super block err %d\n", ret); } + +skip_commit: btrfs_free_block_groups(fs_info); free_fs_roots_tree(&fs_info->fs_root_tree); btrfs_release_all_roots(fs_info); - btrfs_close_devices(fs_info->fs_devices); + ret = btrfs_close_devices(fs_info->fs_devices); btrfs_cleanup_all_caches(fs_info); btrfs_free_fs_info(fs_info); - return 0; + if (!err) + err = ret; + return err; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1437,12 +1689,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return clear_extent_buffer_dirty(eb); } -int wait_on_tree_block_writeback(struct btrfs_root *root, - struct extent_buffer *eb) -{ - return 0; -} - void btrfs_mark_buffer_dirty(struct extent_buffer *eb) { set_extent_buffer_dirty(eb);