X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=cmds-check.c;h=89e12c1570a3b241671b6a93c470a4397b610b4e;hb=e33cad4e349f37c851160f265ff57f32b8b46111;hp=fd6bbd26fa22b33d6a3232154d64865f3ca5cddb;hpb=e42f8c6a3cf01737714142ec9b0bdc52a86063cb;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/cmds-check.c b/cmds-check.c index fd6bbd2..89e12c1 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -40,8 +40,9 @@ #include "qgroup-verify.h" #include "rbtree-utils.h" #include "backref.h" -#include "ulist.h" +#include "kernel-shared/ulist.h" #include "hash.h" +#include "help.h" enum task_position { TASK_EXTENTS, @@ -65,7 +66,6 @@ static u64 total_extent_tree_bytes = 0; static u64 btree_space_waste = 0; static u64 data_bytes_allocated = 0; static u64 data_bytes_referenced = 0; -static int found_old_backref = 0; static LIST_HEAD(duplicate_extents); static LIST_HEAD(delete_items); static int no_holes = 0; @@ -85,7 +85,7 @@ enum btrfs_check_mode { static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT; struct extent_backref { - struct list_head list; + struct rb_node node; unsigned int is_data:1; unsigned int found_extent_tree:1; unsigned int full_backref:1; @@ -93,9 +93,9 @@ struct extent_backref { unsigned int broken:1; }; -static inline struct extent_backref* to_extent_backref(struct list_head *entry) +static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node) { - return list_entry(entry, struct extent_backref, list); + return rb_entry(node, struct extent_backref, node); } struct data_backref { @@ -136,6 +136,51 @@ static inline struct data_backref* to_data_backref(struct extent_backref *back) return container_of(back, struct data_backref, node); } +static int compare_data_backref(struct rb_node *node1, struct rb_node *node2) +{ + struct extent_backref *ext1 = rb_node_to_extent_backref(node1); + struct extent_backref *ext2 = rb_node_to_extent_backref(node2); + struct data_backref *back1 = to_data_backref(ext1); + struct data_backref *back2 = to_data_backref(ext2); + + WARN_ON(!ext1->is_data); + WARN_ON(!ext2->is_data); + + /* parent and root are a union, so this covers both */ + if (back1->parent > back2->parent) + return 1; + if (back1->parent < back2->parent) + return -1; + + /* This is a full backref and the parents match. */ + if (back1->node.full_backref) + return 0; + + if (back1->owner > back2->owner) + return 1; + if (back1->owner < back2->owner) + return -1; + + if (back1->offset > back2->offset) + return 1; + if (back1->offset < back2->offset) + return -1; + + if (back1->found_ref && back2->found_ref) { + if (back1->disk_bytenr > back2->disk_bytenr) + return 1; + if (back1->disk_bytenr < back2->disk_bytenr) + return -1; + + if (back1->bytes > back2->bytes) + return 1; + if (back1->bytes < back2->bytes) + return -1; + } + + return 0; +} + /* * Much like data_backref, just removed the undetermined members * and change it to use list_head. @@ -164,12 +209,54 @@ static inline struct tree_backref* to_tree_backref(struct extent_backref *back) return container_of(back, struct tree_backref, node); } +static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2) +{ + struct extent_backref *ext1 = rb_node_to_extent_backref(node1); + struct extent_backref *ext2 = rb_node_to_extent_backref(node2); + struct tree_backref *back1 = to_tree_backref(ext1); + struct tree_backref *back2 = to_tree_backref(ext2); + + WARN_ON(ext1->is_data); + WARN_ON(ext2->is_data); + + /* parent and root are a union, so this covers both */ + if (back1->parent > back2->parent) + return 1; + if (back1->parent < back2->parent) + return -1; + + return 0; +} + +static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2) +{ + struct extent_backref *ext1 = rb_node_to_extent_backref(node1); + struct extent_backref *ext2 = rb_node_to_extent_backref(node2); + + if (ext1->is_data > ext2->is_data) + return 1; + + if (ext1->is_data < ext2->is_data) + return -1; + + if (ext1->full_backref > ext2->full_backref) + return 1; + if (ext1->full_backref < ext2->full_backref) + return -1; + + if (ext1->is_data) + return compare_data_backref(node1, node2); + else + return compare_tree_backref(node1, node2); +} + /* Explicit initialization for extent_record::flag_block_full_backref */ enum { FLAG_UNSET = 2 }; struct extent_record { struct list_head backrefs; struct list_head dups; + struct rb_root backref_tree; struct list_head list; struct cache_extent cache; struct btrfs_disk_key parent_key; @@ -225,7 +312,6 @@ struct root_item_record { u64 last_snapshot; u8 level; u8 drop_level; - int level_size; struct btrfs_key drop_key; }; @@ -828,7 +914,8 @@ static void print_inode_error(struct btrfs_root *root, struct inode_record *rec) } if (!found) fprintf(stderr, "\tstart: 0, len: %llu\n", - round_up(rec->isize, root->sectorsize)); + round_up(rec->isize, + root->fs_info->sectorsize)); } } @@ -1477,8 +1564,7 @@ out: return has_parent ? 0 : 2; } -static int process_dir_item(struct btrfs_root *root, - struct extent_buffer *eb, +static int process_dir_item(struct extent_buffer *eb, int slot, struct btrfs_key *key, struct shared_node *active_node) { @@ -1512,15 +1598,29 @@ static int process_dir_item(struct btrfs_root *root, filetype = btrfs_dir_type(eb, di); rec->found_size += name_len; - if (name_len <= BTRFS_NAME_LEN) { + if (cur + sizeof(*di) + name_len > total || + name_len > BTRFS_NAME_LEN) { + error = REF_ERR_NAME_TOO_LONG; + + if (cur + sizeof(*di) > total) + break; + len = min_t(u32, total - cur - sizeof(*di), + BTRFS_NAME_LEN); + } else { len = name_len; error = 0; - } else { - len = BTRFS_NAME_LEN; - error = REF_ERR_NAME_TOO_LONG; } + read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len); + if (key->type == BTRFS_DIR_ITEM_KEY && + key->offset != btrfs_name_hash(namebuf, len)) { + rec->errors |= I_ERR_ODD_DIR_ITEM; + error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu", + key->objectid, key->offset, namebuf, len, filetype, + key->offset, btrfs_name_hash(namebuf, len)); + } + if (location.type == BTRFS_INODE_ITEM_KEY) { add_inode_backref(inode_cache, location.objectid, key->objectid, key->offset, namebuf, @@ -1569,13 +1669,22 @@ static int process_inode_ref(struct extent_buffer *eb, while (cur < total) { name_len = btrfs_inode_ref_name_len(eb, ref); index = btrfs_inode_ref_index(eb, ref); - if (name_len <= BTRFS_NAME_LEN) { + + /* inode_ref + namelen should not cross item boundary */ + if (cur + sizeof(*ref) + name_len > total || + name_len > BTRFS_NAME_LEN) { + if (total < cur + sizeof(*ref)) + break; + + /* Still try to read out the remaining part */ + len = min_t(u32, total - cur - sizeof(*ref), + BTRFS_NAME_LEN); + error = REF_ERR_NAME_TOO_LONG; + } else { len = name_len; error = 0; - } else { - len = BTRFS_NAME_LEN; - error = REF_ERR_NAME_TOO_LONG; } + read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len); add_inode_backref(inode_cache, key->objectid, key->offset, index, namebuf, len, 0, key->type, error); @@ -1684,7 +1793,8 @@ static int count_csum_range(struct btrfs_root *root, u64 start, start = key.offset; size = btrfs_item_size_nr(leaf, path.slots[0]); - csum_end = key.offset + (size / csum_size) * root->sectorsize; + csum_end = key.offset + (size / csum_size) * + root->fs_info->sectorsize; if (csum_end > start) { size = min(csum_end - start, len); len -= size; @@ -1711,7 +1821,7 @@ static int process_file_extent(struct btrfs_root *root, u64 num_bytes = 0; u64 disk_bytenr = 0; u64 extent_offset = 0; - u64 mask = root->sectorsize - 1; + u64 mask = root->fs_info->sectorsize - 1; int extent_type; int ret; @@ -1835,7 +1945,7 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb, switch (key.type) { case BTRFS_DIR_ITEM_KEY: case BTRFS_DIR_INDEX_KEY: - ret = process_dir_item(root, eb, i, &key, active_node); + ret = process_dir_item(eb, i, &key, active_node); break; case BTRFS_INODE_REF_KEY: ret = process_inode_ref(eb, i, &key, active_node); @@ -1868,6 +1978,11 @@ static int update_nodes_refs(struct btrfs_root *root, u64 bytenr, static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path, unsigned int ext_ref); +/* + * Returns >0 Found error, not fatal, should continue + * Returns <0 Fatal error, must exit the whole check + * Returns 0 No errors found + */ static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path, struct node_refs *nrefs, int *level, int ext_ref) { @@ -1937,23 +2052,18 @@ again: } out: err &= ~LAST_ITEM; - /* - * Convert any error bitmap to -EIO, as we should avoid - * mixing positive and negative return value to represent - * error - */ if (err && !ret) - ret = -EIO; + ret = err; return ret; } static void reada_walk_down(struct btrfs_root *root, struct extent_buffer *node, int slot) { + struct btrfs_fs_info *fs_info = root->fs_info; u64 bytenr; u64 ptr_gen; u32 nritems; - u32 blocksize; int i; int level; @@ -1962,11 +2072,10 @@ static void reada_walk_down(struct btrfs_root *root, return; nritems = btrfs_header_nritems(node); - blocksize = root->nodesize; for (i = slot; i < nritems; i++) { bytenr = btrfs_node_blockptr(node, i); ptr_gen = btrfs_node_ptr_generation(node, i); - readahead_tree_block(root, bytenr, blocksize, ptr_gen); + readahead_tree_block(fs_info, bytenr, ptr_gen); } } @@ -1983,8 +2092,7 @@ static void reada_walk_down(struct btrfs_root *root, * which makes leaf owner check not so strong, key check should be * sufficient enough for that case. */ -static int check_child_node(struct btrfs_root *root, - struct extent_buffer *parent, int slot, +static int check_child_node(struct extent_buffer *parent, int slot, struct extent_buffer *child) { struct btrfs_key parent_key; @@ -2088,9 +2196,9 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, enum btrfs_tree_block_status status; u64 bytenr; u64 ptr_gen; + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *next; struct extent_buffer *cur; - u32 blocksize; int ret, err = 0; u64 refs; @@ -2139,7 +2247,6 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - blocksize = root->nodesize; if (bytenr == nrefs->bytenr[*level - 1]) { refs = nrefs->refs[*level - 1]; @@ -2163,12 +2270,11 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, } } - next = btrfs_find_tree_block(root, bytenr, blocksize); + next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); - next = read_tree_block(root, bytenr, blocksize, - ptr_gen); + next = read_tree_block(root->fs_info, bytenr, ptr_gen); if (!extent_buffer_uptodate(next)) { struct btrfs_key node_key; @@ -2178,14 +2284,16 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, btrfs_add_corrupt_extent_record(root->fs_info, &node_key, path->nodes[*level]->start, - root->nodesize, *level); + root->fs_info->nodesize, + *level); err = -EIO; goto out; } } - ret = check_child_node(root, cur, path->slots[*level], next); + ret = check_child_node(cur, path->slots[*level], next); if (ret) { + free_extent_buffer(next); err = ret; goto out; } @@ -2213,15 +2321,20 @@ out: static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path, unsigned int ext_ref); +/* + * Returns >0 Found error, should continue + * Returns <0 Fatal error, must exit the whole check + * Returns 0 No errors found + */ static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path, int *level, struct node_refs *nrefs, int ext_ref) { enum btrfs_tree_block_status status; u64 bytenr; u64 ptr_gen; + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *next; struct extent_buffer *cur; - u32 blocksize; int ret; WARN_ON(*level < 0); @@ -2261,7 +2374,6 @@ static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path, } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - blocksize = root->nodesize; ret = update_nodes_refs(root, bytenr, nrefs, *level - 1); if (ret) @@ -2271,28 +2383,28 @@ static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path, continue; } - next = btrfs_find_tree_block(root, bytenr, blocksize); + next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); - next = read_tree_block(root, bytenr, blocksize, - ptr_gen); + next = read_tree_block(fs_info, bytenr, ptr_gen); if (!extent_buffer_uptodate(next)) { struct btrfs_key node_key; btrfs_node_key_to_cpu(path->nodes[*level], &node_key, path->slots[*level]); - btrfs_add_corrupt_extent_record(root->fs_info, + btrfs_add_corrupt_extent_record(fs_info, &node_key, path->nodes[*level]->start, - root->nodesize, *level); + fs_info->nodesize, + *level); ret = -EIO; break; } } - ret = check_child_node(root, cur, path->slots[*level], next); + ret = check_child_node(cur, path->slots[*level], next); if (ret < 0) break; @@ -2534,8 +2646,6 @@ static int add_missing_dir_index(struct btrfs_root *root, } static int delete_dir_index(struct btrfs_root *root, - struct cache_tree *inode_cache, - struct inode_record *rec, struct inode_backref *backref) { struct btrfs_trans_handle *trans; @@ -2577,7 +2687,7 @@ static int delete_dir_index(struct btrfs_root *root, static int create_inode_item(struct btrfs_root *root, struct inode_record *rec, - struct inode_backref *backref, int root_dir) + int root_dir) { struct btrfs_trans_handle *trans; struct btrfs_inode_item inode_item; @@ -2643,7 +2753,7 @@ static int repair_inode_backrefs(struct btrfs_root *root, list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) { if (!delete && rec->ino == root_dirid) { if (!rec->found_inode_item) { - ret = create_inode_item(root, rec, backref, 1); + ret = create_inode_item(root, rec, 1); if (ret) break; repaired++; @@ -2658,12 +2768,13 @@ static int repair_inode_backrefs(struct btrfs_root *root, ((backref->found_dir_index && !backref->found_inode_ref) || (backref->found_dir_index && backref->found_inode_ref && (backref->errors & REF_ERR_INDEX_UNMATCH)))) { - ret = delete_dir_index(root, inode_cache, rec, backref); + ret = delete_dir_index(root, backref); if (ret) break; repaired++; list_del(&backref->list); free(backref); + continue; } if (!delete && !backref->found_dir_index && @@ -2674,12 +2785,12 @@ static int repair_inode_backrefs(struct btrfs_root *root, break; repaired++; if (backref->found_dir_item && - backref->found_dir_index && backref->found_dir_index) { if (!backref->errors && backref->found_inode_ref) { list_del(&backref->list); free(backref); + continue; } } } @@ -2729,7 +2840,7 @@ static int repair_inode_backrefs(struct btrfs_root *root, backref->found_dir_item && !(backref->errors & REF_ERR_INDEX_UNMATCH) && !rec->found_inode_item)) { - ret = create_inode_item(root, rec, backref, 0); + ret = create_inode_item(root, rec, 0); if (ret) break; repaired++; @@ -2853,6 +2964,31 @@ out: return ret; } +static int get_highest_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 *highest_ino) +{ + struct btrfs_key key, found_key; + int ret; + + btrfs_init_path(path); + key.objectid = BTRFS_LAST_FREE_OBJECTID; + key.offset = -1; + key.type = BTRFS_INODE_ITEM_KEY; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret == 1) { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0] - 1); + *highest_ino = found_key.objectid; + ret = 0; + } + if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID) + ret = -EOVERFLOW; + btrfs_release_path(path); + return ret; +} + static int repair_inode_nlinks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -2898,11 +3034,9 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, } if (rec->found_link == 0) { - lost_found_ino = root->highest_inode; - if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) { - ret = -EOVERFLOW; + ret = get_highest_inode(trans, root, path, &lost_found_ino); + if (ret < 0) goto out; - } lost_found_ino++; ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name), BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino, @@ -3190,7 +3324,8 @@ static int repair_inode_discount_extent(struct btrfs_trans_handle *trans, /* special case for a file losing all its file extent */ if (!found) { ret = btrfs_punch_hole(trans, root, rec->ino, 0, - round_up(rec->isize, root->sectorsize)); + round_up(rec->isize, + root->fs_info->sectorsize)); if (ret < 0) goto out; } @@ -3266,21 +3401,6 @@ static int check_inode_recs(struct btrfs_root *root, } /* - * We need to record the highest inode number for later 'lost+found' - * dir creation. - * We must select an ino not used/referred by any existing inode, or - * 'lost+found' ino may be a missing ino in a corrupted leaf, - * this may cause 'lost+found' dir has wrong nlinks. - */ - cache = last_cache_extent(inode_cache); - if (cache) { - node = container_of(cache, struct ptr_node, cache); - rec = node->data; - if (rec->ino > root->highest_inode) - root->highest_inode = rec->ino; - } - - /* * We need to repair backrefs first because we could change some of the * errors in the inode recs. * @@ -3817,8 +3937,7 @@ static int repair_btree(struct btrfs_root *root, path.slots[level]); /* Remove the ptr */ - ret = btrfs_del_ptr(trans, root, &path, level, - path.slots[level]); + ret = btrfs_del_ptr(root, &path, level, path.slots[level]); if (ret < 0) goto out; /* @@ -3826,9 +3945,9 @@ static int repair_btree(struct btrfs_root *root, * return value is not concerned. */ btrfs_release_path(&path); - ret = btrfs_free_extent(trans, root, offset, root->nodesize, - 0, root->root_key.objectid, - level - 1, 0); + ret = btrfs_free_extent(trans, root, offset, + root->fs_info->nodesize, 0, + root->root_key.objectid, level - 1, 0); cache = next_cache_extent(cache); } @@ -4018,7 +4137,7 @@ static int fs_root_objectid(u64 objectid) return is_fstree(objectid); } -static int check_fs_roots(struct btrfs_root *root, +static int check_fs_roots(struct btrfs_fs_info *fs_info, struct cache_tree *root_cache) { struct btrfs_path path; @@ -4026,7 +4145,7 @@ static int check_fs_roots(struct btrfs_root *root, struct walk_control wc; struct extent_buffer *leaf, *tree_node; struct btrfs_root *tmp_root; - struct btrfs_root *tree_root = root->fs_info->tree_root; + struct btrfs_root *tree_root = fs_info->tree_root; int ret; int err = 0; @@ -4040,7 +4159,7 @@ static int check_fs_roots(struct btrfs_root *root, * reflected into the free space cache yet. */ if (repair) - reset_cached_block_groups(root->fs_info); + reset_cached_block_groups(fs_info); memset(&wc, 0, sizeof(wc)); cache_tree_init(&wc.shared); btrfs_init_path(&path); @@ -4076,11 +4195,11 @@ again: fs_root_objectid(key.objectid)) { if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) { tmp_root = btrfs_read_fs_root_no_cache( - root->fs_info, &key); + fs_info, &key); } else { key.offset = (u64)-1; tmp_root = btrfs_read_fs_root( - root->fs_info, &key); + fs_info, &key); } if (IS_ERR(tmp_root)) { err = 1; @@ -4215,16 +4334,22 @@ static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key, if (imode_to_type(mode) != filetype) goto next; - if (name_len <= BTRFS_NAME_LEN) { - len = name_len; - } else { - len = BTRFS_NAME_LEN; + if (cur + sizeof(*di) + name_len > total || + name_len > BTRFS_NAME_LEN) { warning("root %llu %s[%llu %llu] name too long %u, trimmed", - root->objectid, - key->type == BTRFS_DIR_ITEM_KEY ? - "DIR_ITEM" : "DIR_INDEX", - key->objectid, key->offset, name_len); + root->objectid, + key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", + key->objectid, key->offset, name_len); + + if (cur + sizeof(*di) > total) + break; + len = min_t(u32, total - cur - sizeof(*di), + BTRFS_NAME_LEN); + } else { + len = name_len; } + read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len); if (len != namelen || strncmp(namebuf, name, len)) goto next; @@ -4285,12 +4410,16 @@ next: index = btrfs_inode_ref_index(node, ref); name_len = btrfs_inode_ref_name_len(node, ref); - if (name_len <= BTRFS_NAME_LEN) { - len = name_len; - } else { - len = BTRFS_NAME_LEN; + if (cur + sizeof(*ref) + name_len > total || + name_len > BTRFS_NAME_LEN) { warning("root %llu INODE_REF[%llu %llu] name too long", root->objectid, ref_key->objectid, ref_key->offset); + + if (total < cur + sizeof(*ref)) + goto out; + len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN); + } else { + len = name_len; } read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len); @@ -4323,6 +4452,7 @@ next: if (cur < total) goto next; +out: return err; } @@ -4460,16 +4590,22 @@ static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key, if (index != (u64)-1 && index != ref_index) goto next_ref; - if (ref_namelen <= BTRFS_NAME_LEN) { - len = ref_namelen; - } else { - len = BTRFS_NAME_LEN; + if (cur + sizeof(*ref) + ref_namelen > total || + ref_namelen > BTRFS_NAME_LEN) { warning("root %llu INODE %s[%llu %llu] name too long", root->objectid, key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF", key->objectid, key->offset); + + if (cur + sizeof(*ref) > total) + break; + len = min_t(u32, total - cur - sizeof(*ref), + BTRFS_NAME_LEN); + } else { + len = ref_namelen; } + read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1), len); @@ -4601,21 +4737,35 @@ static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key, key->objectid, key->offset, data_len); name_len = btrfs_dir_name_len(node, di); - if (name_len <= BTRFS_NAME_LEN) { - len = name_len; - } else { - len = BTRFS_NAME_LEN; + if (cur + sizeof(*di) + name_len > total || + name_len > BTRFS_NAME_LEN) { warning("root %llu %s[%llu %llu] name too long", root->objectid, key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX", key->objectid, key->offset); + + if (cur + sizeof(*di) > total) + break; + len = min_t(u32, total - cur - sizeof(*di), + BTRFS_NAME_LEN); + } else { + len = name_len; } (*size) += name_len; read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len); filetype = btrfs_dir_type(node, di); + if (key->type == BTRFS_DIR_ITEM_KEY && + key->offset != btrfs_name_hash(namebuf, len)) { + err |= -EIO; + error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu", + root->objectid, key->objectid, key->offset, + namebuf, len, filetype, key->offset, + btrfs_name_hash(namebuf, len)); + } + btrfs_init_path(&path); btrfs_dir_item_key_to_cpu(node, di, &location); @@ -4695,25 +4845,41 @@ static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey, u64 disk_bytenr; u64 disk_num_bytes; u64 extent_num_bytes; - u64 found; + u64 extent_offset; + u64 csum_found; /* In byte size, sectorsize aligned */ + u64 search_start; /* Logical range start we search for csum */ + u64 search_len; /* Logical range len we search for csum */ unsigned int extent_type; unsigned int is_hole; + int compressed = 0; int ret; int err = 0; fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item); + /* Check inline extent */ extent_type = btrfs_file_extent_type(node, fi); - /* Skip if file extent is inline */ if (extent_type == BTRFS_FILE_EXTENT_INLINE) { struct btrfs_item *e = btrfs_item_nr(slot); u32 item_inline_len; item_inline_len = btrfs_file_extent_inline_item_len(node, e); extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi); - if (extent_num_bytes == 0 || - extent_num_bytes != item_inline_len) + compressed = btrfs_file_extent_compression(node, fi); + if (extent_num_bytes == 0) { + error( + "root %llu EXTENT_DATA[%llu %llu] has empty inline extent", + root->objectid, fkey->objectid, fkey->offset); + err |= FILE_EXTENT_ERROR; + } + if (!compressed && extent_num_bytes != item_inline_len) { + error( + "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u", + root->objectid, fkey->objectid, fkey->offset, + extent_num_bytes, item_inline_len); err |= FILE_EXTENT_ERROR; + } + *end += extent_num_bytes; *size += extent_num_bytes; return err; } @@ -4731,32 +4897,49 @@ static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey, disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi); disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi); extent_num_bytes = btrfs_file_extent_num_bytes(node, fi); + extent_offset = btrfs_file_extent_offset(node, fi); + compressed = btrfs_file_extent_compression(node, fi); is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0); - /* Check EXTENT_DATA datasum */ - ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found); - if (found > 0 && nodatasum) { + /* + * Check EXTENT_DATA csum + * + * For plain (uncompressed) extent, we should only check the range + * we're referring to, as it's possible that part of prealloc extent + * has been written, and has csum: + * + * |<--- Original large preallocated extent A ---->| + * |<- Prealloc File Extent ->|<- Regular Extent ->| + * No csum Has csum + * + * For compressed extent, we should check the whole range. + */ + if (!compressed) { + search_start = disk_bytenr + extent_offset; + search_len = extent_num_bytes; + } else { + search_start = disk_bytenr; + search_len = disk_num_bytes; + } + ret = count_csum_range(root, search_start, search_len, &csum_found); + if (csum_found > 0 && nodatasum) { err |= ODD_CSUM_ITEM; error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum", root->objectid, fkey->objectid, fkey->offset); } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum && - !is_hole && - (ret < 0 || found == 0 || found < disk_num_bytes)) { + !is_hole && (ret < 0 || csum_found < search_len)) { err |= CSUM_ITEM_MISSING; - error("root %llu EXTENT_DATA[%llu %llu] datasum missing", - root->objectid, fkey->objectid, fkey->offset); - } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) { + error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu", + root->objectid, fkey->objectid, fkey->offset, + csum_found, search_len); + } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) { err |= ODD_CSUM_ITEM; - error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum", - root->objectid, fkey->objectid, fkey->offset); + error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu", + root->objectid, fkey->objectid, fkey->offset, csum_found); } /* Check EXTENT_DATA hole */ - if (no_holes && is_hole) { - err |= FILE_EXTENT_ERROR; - error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole", - root->objectid, fkey->objectid, fkey->offset); - } else if (!no_holes && *end != fkey->offset) { + if (!no_holes && *end != fkey->offset) { err |= FILE_EXTENT_ERROR; error("root %llu EXTENT_DATA[%llu %llu] interrupt", root->objectid, fkey->objectid, fkey->offset); @@ -4897,9 +5080,10 @@ out: * Just a warning, as dir inode nbytes is just an * instructive value. */ - if (!IS_ALIGNED(nbytes, root->nodesize)) { + if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) { warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u", - root->objectid, inode_id, root->nodesize); + root->objectid, inode_id, + root->fs_info->nodesize); } if (isize != size) { @@ -4939,17 +5123,26 @@ static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref) int err = 0; int ret; - btrfs_init_path(&path); key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; + /* For root being dropped, we don't need to check first inode */ + if (btrfs_root_refs(&root->root_item) == 0 && + btrfs_disk_key_objectid(&root->root_item.drop_progress) >= + key.objectid) + return 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret < 0) goto out; if (ret > 0) { ret = 0; err |= INODE_ITEM_MISSING; + error("first inode item of root %llu is missing", + root->objectid); } err |= check_inode_item(root, &path, ext_ref); @@ -4961,6 +5154,65 @@ out: return ret; } +static struct tree_backref *find_tree_backref(struct extent_record *rec, + u64 parent, u64 root) +{ + struct rb_node *node; + struct tree_backref *back = NULL; + struct tree_backref match = { + .node = { + .is_data = 0, + }, + }; + + if (parent) { + match.parent = parent; + match.node.full_backref = 1; + } else { + match.root = root; + } + + node = rb_search(&rec->backref_tree, &match.node.node, + (rb_compare_keys)compare_extent_backref, NULL); + if (node) + back = to_tree_backref(rb_node_to_extent_backref(node)); + + return back; +} + +static struct data_backref *find_data_backref(struct extent_record *rec, + u64 parent, u64 root, + u64 owner, u64 offset, + int found_ref, + u64 disk_bytenr, u64 bytes) +{ + struct rb_node *node; + struct data_backref *back = NULL; + struct data_backref match = { + .node = { + .is_data = 1, + }, + .owner = owner, + .offset = offset, + .bytes = bytes, + .found_ref = found_ref, + .disk_bytenr = disk_bytenr, + }; + + if (parent) { + match.parent = parent; + match.node.full_backref = 1; + } else { + match.root = root; + } + + node = rb_search(&rec->backref_tree, &match.node.node, + (rb_compare_keys)compare_extent_backref, NULL); + if (node) + back = to_data_backref(rb_node_to_extent_backref(node)); + + return back; +} /* * Iterate all item on the tree and call check_inode_item() to check. * @@ -4975,8 +5227,9 @@ static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref) struct btrfs_path path; struct node_refs nrefs; struct btrfs_root_item *root_item = &root->root_item; - int ret, wret; + int ret; int level; + int err = 0; /* * We need to manually check the first inode item(256) @@ -5010,17 +5263,21 @@ static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref) } while (1) { - wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref); - if (wret < 0) - ret = wret; - if (wret != 0) + ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref); + err |= !!ret; + + /* if ret is negative, walk shall stop */ + if (ret < 0) { + ret = err; break; + } - wret = walk_up_tree_v2(root, &path, &level); - if (wret < 0) - ret = wret; - if (wret != 0) + ret = walk_up_tree_v2(root, &path, &level); + if (ret != 0) { + /* Normal exit, reset ret to err */ + ret = err; break; + } } out: @@ -5131,7 +5388,7 @@ static int check_fs_roots_v2(struct btrfs_fs_info *fs_info) { struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *cur_root = NULL; - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_key key; struct extent_buffer *node; unsigned int ext_ref; @@ -5141,15 +5398,12 @@ static int check_fs_roots_v2(struct btrfs_fs_info *fs_info) ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF); - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = BTRFS_FS_TREE_OBJECTID; key.offset = 0; key.type = BTRFS_ROOT_ITEM_KEY; - ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0); if (ret < 0) { err = ret; goto out; @@ -5159,8 +5413,8 @@ static int check_fs_roots_v2(struct btrfs_fs_info *fs_info) } while (1) { - node = path->nodes[0]; - slot = path->slots[0]; + node = path.nodes[0]; + slot = path.slots[0]; btrfs_item_key_to_cpu(node, &key, slot); if (key.objectid > BTRFS_LAST_FREE_OBJECTID) goto out; @@ -5192,7 +5446,7 @@ static int check_fs_roots_v2(struct btrfs_fs_info *fs_info) err |= ret; } next: - ret = btrfs_next_item(tree_root, path); + ret = btrfs_next_item(tree_root, &path); if (ret > 0) goto out; if (ret < 0) { @@ -5202,29 +5456,42 @@ next: } out: - btrfs_free_path(path); + btrfs_release_path(&path); return err; } +static int do_check_fs_roots(struct btrfs_fs_info *fs_info, + struct cache_tree *root_cache) +{ + int ret; + + if (!ctx.progress_enabled) + fprintf(stderr, "checking fs roots\n"); + if (check_mode == CHECK_MODE_LOWMEM) + ret = check_fs_roots_v2(fs_info); + else + ret = check_fs_roots(fs_info, root_cache); + + return ret; +} + static int all_backpointers_checked(struct extent_record *rec, int print_errs) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *back; + struct extent_backref *back, *tmp; struct tree_backref *tback; struct data_backref *dback; u64 found = 0; int err = 0; - while(cur != &rec->backrefs) { - back = to_extent_backref(cur); - cur = cur->next; + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { if (!back->found_extent_tree) { err = 1; if (!print_errs) goto out; if (back->is_data) { dback = to_data_backref(back); - fprintf(stderr, "Backref %llu %s %llu" + fprintf(stderr, "Data backref %llu %s %llu" " owner %llu offset %llu num_refs %lu" " not found in extent tree\n", (unsigned long long)rec->start, @@ -5238,7 +5505,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) (unsigned long)dback->num_refs); } else { tback = to_tree_backref(back); - fprintf(stderr, "Backref %llu parent %llu" + fprintf(stderr, "Tree backref %llu parent %llu" " root %llu not found in extent tree\n", (unsigned long long)rec->start, (unsigned long long)tback->parent, @@ -5320,21 +5587,19 @@ out: return err; } -static int free_all_extent_backrefs(struct extent_record *rec) +static void __free_one_backref(struct rb_node *node) { - struct extent_backref *back; - struct list_head *cur; - while (!list_empty(&rec->backrefs)) { - cur = rec->backrefs.next; - back = to_extent_backref(cur); - list_del(cur); - free(back); - } - return 0; + struct extent_backref *back = rb_node_to_extent_backref(node); + + free(back); +} + +static void free_all_extent_backrefs(struct extent_record *rec) +{ + rb_free_nodes(&rec->backref_tree, __free_one_backref); } -static void free_extent_record_cache(struct btrfs_fs_info *fs_info, - struct cache_tree *extent_cache) +static void free_extent_record_cache(struct cache_tree *extent_cache) { struct cache_extent *cache; struct extent_record *rec; @@ -5370,7 +5635,7 @@ static int check_owner_ref(struct btrfs_root *root, struct extent_record *rec, struct extent_buffer *buf) { - struct extent_backref *node; + struct extent_backref *node, *tmp; struct tree_backref *back; struct btrfs_root *ref_root; struct btrfs_key key; @@ -5380,7 +5645,8 @@ static int check_owner_ref(struct btrfs_root *root, int found = 0; int ret; - list_for_each_entry(node, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(node, tmp, + &rec->backref_tree, node) { if (node->is_data) continue; if (!node->found_ref) @@ -5425,14 +5691,12 @@ static int check_owner_ref(struct btrfs_root *root, static int is_extent_tree_record(struct extent_record *rec) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; + struct extent_backref *node, *tmp; struct tree_backref *back; int is_extent = 0; - while(cur != &rec->backrefs) { - node = to_extent_backref(cur); - cur = cur->next; + rbtree_postorder_for_each_entry_safe(node, tmp, + &rec->backref_tree, node) { if (node->is_data) return 0; back = to_tree_backref(node); @@ -5533,9 +5797,7 @@ static int swap_values(struct btrfs_root *root, struct btrfs_path *path, return 0; } -static int fix_key_order(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path) +static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path) { struct extent_buffer *buf; struct btrfs_key k1, k2; @@ -5563,8 +5825,7 @@ static int fix_key_order(struct btrfs_trans_handle *trans, return ret; } -static int delete_bogus_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int delete_bogus_item(struct btrfs_root *root, struct btrfs_path *path, struct extent_buffer *buf, int slot) { @@ -5599,9 +5860,7 @@ static int delete_bogus_item(struct btrfs_trans_handle *trans, return 0; } -static int fix_item_offset(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path) +static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path) { struct extent_buffer *buf; int i; @@ -5618,8 +5877,7 @@ again: BTRFS_LEAF_DATA_SIZE(root)) { if (btrfs_item_end_nr(buf, i) > BTRFS_LEAF_DATA_SIZE(root)) { - ret = delete_bogus_item(trans, root, path, - buf, i); + ret = delete_bogus_item(root, path, buf, i); if (!ret) goto again; fprintf(stderr, "item is off the end of the " @@ -5633,8 +5891,7 @@ again: btrfs_item_offset_nr(buf, i - 1)) { if (btrfs_item_end_nr(buf, i) > btrfs_item_offset_nr(buf, i - 1)) { - ret = delete_bogus_item(trans, root, path, - buf, i); + ret = delete_bogus_item(root, path, buf, i); if (!ret) goto again; fprintf(stderr, "items overlap, can't fix\n"); @@ -5726,9 +5983,9 @@ static int try_to_fix_bad_block(struct btrfs_root *root, break; } if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) - ret = fix_key_order(trans, search_root, &path); + ret = fix_key_order(search_root, &path); else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) - ret = fix_item_offset(trans, search_root, &path); + ret = fix_item_offset(search_root, &path); if (ret) { btrfs_commit_transaction(trans, search_root); break; @@ -5804,6 +6061,7 @@ static int check_block(struct btrfs_root *root, return ret; } +#if 0 static struct tree_backref *find_tree_backref(struct extent_record *rec, u64 parent, u64 root) { @@ -5831,6 +6089,7 @@ static struct tree_backref *find_tree_backref(struct extent_record *rec, } return NULL; } +#endif static struct tree_backref *alloc_tree_backref(struct extent_record *rec, u64 parent, u64 root) @@ -5847,11 +6106,11 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec, ref->root = root; ref->node.full_backref = 0; } - list_add_tail(&ref->node.list, &rec->backrefs); return ref; } +#if 0 static struct data_backref *find_data_backref(struct extent_record *rec, u64 parent, u64 root, u64 owner, u64 offset, @@ -5888,6 +6147,7 @@ static struct data_backref *find_data_backref(struct extent_record *rec, } return NULL; } +#endif static struct data_backref *alloc_data_backref(struct extent_record *rec, u64 parent, u64 root, @@ -5915,7 +6175,6 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec, ref->bytes = max_size; ref->found_ref = 0; ref->num_refs = 0; - list_add_tail(&ref->node.list, &rec->backrefs); if (max_size > rec->max_size) rec->max_size = max_size; return ref; @@ -5948,12 +6207,12 @@ static void check_extent_type(struct extent_record *rec) * Check SYSTEM extent, as it's also marked as metadata, we can only * make sure it's a SYSTEM extent by its backref */ - if (!list_empty(&rec->backrefs)) { + if (!RB_EMPTY_ROOT(&rec->backref_tree)) { struct extent_backref *node; struct tree_backref *tback; u64 bg_type; - node = to_extent_backref(rec->backrefs.next); + node = rb_node_to_extent_backref(rb_first(&rec->backref_tree)); if (node->is_data) { /* tree block shouldn't have data backref */ rec->wrong_chunk_type = 1; @@ -5981,6 +6240,7 @@ static int add_extent_rec_nolookup(struct cache_tree *extent_cache, struct extent_record *rec; int ret = 0; + BUG_ON(tmpl->max_size == 0); rec = malloc(sizeof(*rec)); if (!rec) return -ENOMEM; @@ -6003,6 +6263,7 @@ static int add_extent_rec_nolookup(struct cache_tree *extent_cache, INIT_LIST_HEAD(&rec->backrefs); INIT_LIST_HEAD(&rec->dups); INIT_LIST_HEAD(&rec->list); + rec->backref_tree = RB_ROOT; memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key)); rec->cache.start = tmpl->start; rec->cache.size = tmpl->nr; @@ -6015,7 +6276,7 @@ static int add_extent_rec_nolookup(struct cache_tree *extent_cache, if (tmpl->metadata) rec->crossing_stripes = check_crossing_stripes(global_info, - rec->start, global_info->tree_root->nodesize); + rec->start, global_info->nodesize); check_extent_type(rec); return ret; } @@ -6117,7 +6378,7 @@ static int add_extent_rec(struct cache_tree *extent_cache, if (tmpl->metadata) rec->crossing_stripes = check_crossing_stripes( global_info, rec->start, - global_info->tree_root->nodesize); + global_info->nodesize); check_extent_type(rec); maybe_free_extent_rec(extent_cache, rec); return ret; @@ -6135,6 +6396,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, struct tree_backref *back; struct cache_extent *cache; int ret; + bool insert = false; cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) { @@ -6144,6 +6406,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, tmpl.start = bytenr; tmpl.nr = 1; tmpl.metadata = 1; + tmpl.max_size = 1; ret = add_extent_rec_nolookup(extent_cache, &tmpl); if (ret) @@ -6168,6 +6431,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, back = alloc_tree_backref(rec, parent, root); if (!back) return -ENOMEM; + insert = true; } if (found_ref) { @@ -6189,6 +6453,9 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, } back->node.found_extent_tree = 1; } + if (insert) + WARN_ON(rb_insert(&rec->backref_tree, &back->node.node, + compare_extent_backref)); check_extent_type(rec); maybe_free_extent_rec(extent_cache, rec); return 0; @@ -6202,6 +6469,7 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, struct data_backref *back; struct cache_extent *cache; int ret; + bool insert = false; cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) { @@ -6241,6 +6509,7 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, back = alloc_data_backref(rec, parent, root, owner, offset, max_size); BUG_ON(!back); + insert = true; } if (found_ref) { @@ -6249,8 +6518,16 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, BUG_ON(back->bytes != max_size); back->node.found_ref = 1; back->found_ref += 1; - back->bytes = max_size; - back->disk_bytenr = bytenr; + if (back->bytes != max_size || back->disk_bytenr != bytenr) { + back->bytes = max_size; + back->disk_bytenr = bytenr; + + /* Need to reinsert if not already in the tree */ + if (!insert) { + rb_erase(&back->node.node, &rec->backref_tree); + insert = true; + } + } rec->refs += 1; rec->content_checked = 1; rec->owner_ref_checked = 1; @@ -6269,6 +6546,10 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, back->num_refs = num_refs; back->node.found_extent_tree = 1; } + if (insert) + WARN_ON(rb_insert(&rec->backref_tree, &back->node.node, + compare_extent_backref)); + maybe_free_extent_rec(extent_cache, rec); return 0; } @@ -6531,7 +6812,7 @@ static int process_chunk_item(struct cache_tree *chunk_cache, * wrong onwer(3) out of chunk tree, to pass both chunk tree check * and owner<->key_type check. */ - ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot, + ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot, key->offset); if (ret < 0) { error("chunk(%llu, %llu) is not valid, ignore it", @@ -6713,14 +6994,14 @@ static int process_extent_item(struct btrfs_root *root, if (key.type == BTRFS_METADATA_ITEM_KEY) { metadata = 1; - num_bytes = root->nodesize; + num_bytes = root->fs_info->nodesize; } else { num_bytes = key.offset; } - if (!IS_ALIGNED(key.objectid, root->sectorsize)) { + if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) { error("ignoring invalid extent, bytenr %llu is not aligned to %u", - key.objectid, root->sectorsize); + key.objectid, root->fs_info->sectorsize); return -EIO; } if (item_size < sizeof(*ei)) { @@ -6749,14 +7030,14 @@ static int process_extent_item(struct btrfs_root *root, metadata = 1; else metadata = 0; - if (metadata && num_bytes != root->nodesize) { + if (metadata && num_bytes != root->fs_info->nodesize) { error("ignore invalid metadata extent, length %llu does not equal to %u", - num_bytes, root->nodesize); + num_bytes, root->fs_info->nodesize); return -EIO; } - if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) { + if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) { error("ignore invalid data extent, length %llu is not aligned to %u", - num_bytes, root->sectorsize); + num_bytes, root->fs_info->sectorsize); return -EIO; } @@ -6784,14 +7065,16 @@ static int process_extent_item(struct btrfs_root *root, ret = add_tree_backref(extent_cache, key.objectid, 0, offset, 0); if (ret < 0) - error("add_tree_backref failed: %s", + error( + "add_tree_backref failed (extent items tree block): %s", strerror(-ret)); break; case BTRFS_SHARED_BLOCK_REF_KEY: ret = add_tree_backref(extent_cache, key.objectid, offset, 0, 0); if (ret < 0) - error("add_tree_backref failed: %s", + error( + "add_tree_backref failed (extent items shared block): %s", strerror(-ret)); break; case BTRFS_EXTENT_DATA_REF_KEY: @@ -6835,7 +7118,7 @@ static int check_cache_range(struct btrfs_root *root, for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + ret = btrfs_rmap_block(root->fs_info, cache->key.objectid, bytenr, 0, &logical, &nr, &stripe_len); if (ret) @@ -6962,7 +7245,7 @@ static int verify_space_cache(struct btrfs_root *root, if (key.type == BTRFS_EXTENT_ITEM_KEY) last = key.objectid + key.offset; else - last = key.objectid + root->nodesize; + last = key.objectid + root->fs_info->nodesize; path.slots[0]++; continue; } @@ -6974,7 +7257,7 @@ static int verify_space_cache(struct btrfs_root *root, if (key.type == BTRFS_EXTENT_ITEM_KEY) last = key.objectid + key.offset; else - last = key.objectid + root->nodesize; + last = key.objectid + root->fs_info->nodesize; path.slots[0]++; } @@ -7024,7 +7307,7 @@ static int check_space_cache(struct btrfs_root *root) start = cache->key.objectid + cache->key.offset; if (!cache->free_space_ctl) { if (btrfs_init_free_space_ctl(cache, - root->sectorsize)) { + root->fs_info->sectorsize)) { ret = -ENOMEM; break; } @@ -7072,8 +7355,9 @@ static int check_extent_csums(struct btrfs_root *root, u64 bytenr, u64 num_bytes, unsigned long leaf_offset, struct extent_buffer *eb) { + struct btrfs_fs_info *fs_info = root->fs_info; u64 offset = 0; - u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); char *data; unsigned long csum_offset; u32 csum; @@ -7085,7 +7369,7 @@ static int check_extent_csums(struct btrfs_root *root, u64 bytenr, int mirror; int num_copies; - if (num_bytes % root->sectorsize) + if (num_bytes % fs_info->sectorsize) return -EINVAL; data = malloc(num_bytes); @@ -7097,7 +7381,7 @@ static int check_extent_csums(struct btrfs_root *root, u64 bytenr, again: read_len = num_bytes - offset; /* read as much space once a time */ - ret = read_extent_data(root, data + offset, + ret = read_extent_data(fs_info, data + offset, bytenr + offset, &read_len, mirror); if (ret) goto out; @@ -7107,12 +7391,12 @@ again: csum = ~(u32)0; tmp = offset + data_checked; - csum = btrfs_csum_data(NULL, (char *)data + tmp, - csum, root->sectorsize); + csum = btrfs_csum_data((char *)data + tmp, + csum, fs_info->sectorsize); btrfs_csum_final(csum, (u8 *)&csum); csum_offset = leaf_offset + - tmp / root->sectorsize * csum_size; + tmp / fs_info->sectorsize * csum_size; read_extent_buffer(eb, (char *)&csum_expected, csum_offset, csum_size); /* try another mirror */ @@ -7120,15 +7404,14 @@ again: fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n", mirror, bytenr + tmp, csum, csum_expected); - num_copies = btrfs_num_copies( - &root->fs_info->mapping_tree, + num_copies = btrfs_num_copies(root->fs_info, bytenr, num_bytes); if (mirror < num_copies - 1) { mirror += 1; goto again; } } - data_checked += root->sectorsize; + data_checked += fs_info->sectorsize; } offset += read_len; } @@ -7329,7 +7612,7 @@ static int check_csums(struct btrfs_root *root) } data_len = (btrfs_item_size_nr(leaf, path.slots[0]) / - csum_size) * root->sectorsize; + csum_size) * root->fs_info->sectorsize; if (!check_data_csum) goto skip_csum_check; leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]); @@ -7394,8 +7677,7 @@ static int is_dropped_key(struct btrfs_key *key, * assumption and simply indicate that we _think_ that the FULL BACKREF needs to * be set or not and then we can check later once we've gathered all the refs. */ -static int calc_extent_flag(struct btrfs_root *root, - struct cache_tree *extent_cache, +static int calc_extent_flag(struct cache_tree *extent_cache, struct extent_buffer *buf, struct root_item_record *ri, u64 *flags) @@ -7515,6 +7797,7 @@ static int run_next_block(struct btrfs_root *root, struct device_extent_tree *dev_extent_cache, struct root_item_record *ri) { + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *buf; struct extent_record *rec = NULL; u64 bytenr; @@ -7544,8 +7827,7 @@ static int run_next_block(struct btrfs_root *root, continue; /* fixme, get the parent transid */ - readahead_tree_block(root, bits[i].start, - bits[i].size, 0); + readahead_tree_block(fs_info, bits[i].start, 0); } } *last = bits[0].start; @@ -7574,7 +7856,7 @@ static int run_next_block(struct btrfs_root *root, } /* fixme, get the real parent transid */ - buf = read_tree_block(root, bytenr, size, gen); + buf = read_tree_block(root->fs_info, bytenr, gen); if (!extent_buffer_uptodate(buf)) { record_bad_block_io(root->fs_info, extent_cache, bytenr, size); @@ -7589,7 +7871,7 @@ static int run_next_block(struct btrfs_root *root, btrfs_header_level(buf), 1, NULL, &flags); if (ret < 0) { - ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); + ret = calc_extent_flag(extent_cache, buf, ri, &flags); if (ret < 0) { fprintf(stderr, "Couldn't calc extent flags\n"); flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; @@ -7597,7 +7879,7 @@ static int run_next_block(struct btrfs_root *root, } } else { flags = 0; - ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); + ret = calc_extent_flag(extent_cache, buf, ri, &flags); if (ret < 0) { fprintf(stderr, "Couldn't calc extent flags\n"); flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; @@ -7706,7 +7988,8 @@ static int run_next_block(struct btrfs_root *root, ret = add_tree_backref(extent_cache, key.objectid, 0, key.offset, 0); if (ret < 0) - error("add_tree_backref failed: %s", + error( + "add_tree_backref failed (leaf tree block): %s", strerror(-ret)); continue; } @@ -7714,7 +7997,8 @@ static int run_next_block(struct btrfs_root *root, ret = add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0); if (ret < 0) - error("add_tree_backref failed: %s", + error( + "add_tree_backref failed (leaf shared block): %s", strerror(-ret)); continue; } @@ -7729,7 +8013,7 @@ static int run_next_block(struct btrfs_root *root, ref), btrfs_extent_data_ref_offset(buf, ref), btrfs_extent_data_ref_count(buf, ref), - 0, root->sectorsize); + 0, root->fs_info->sectorsize); continue; } if (key.type == BTRFS_SHARED_DATA_REF_KEY) { @@ -7739,7 +8023,7 @@ static int run_next_block(struct btrfs_root *root, add_data_backref(extent_cache, key.objectid, key.offset, 0, 0, 0, btrfs_shared_data_ref_count(buf, ref), - 0, root->sectorsize); + 0, root->fs_info->sectorsize); continue; } if (key.type == BTRFS_ORPHAN_ITEM_KEY) { @@ -7771,7 +8055,7 @@ static int run_next_block(struct btrfs_root *root, data_bytes_allocated += btrfs_file_extent_disk_num_bytes(buf, fi); - if (data_bytes_allocated < root->sectorsize) { + if (data_bytes_allocated < root->fs_info->sectorsize) { abort(); } data_bytes_referenced += @@ -7795,7 +8079,7 @@ static int run_next_block(struct btrfs_root *root, struct extent_record tmpl; ptr = btrfs_node_blockptr(buf, i); - size = root->nodesize; + size = root->fs_info->nodesize; btrfs_node_key_to_cpu(buf, &key, i); if (ri != NULL) { if ((level == ri->drop_level) @@ -7819,7 +8103,8 @@ static int run_next_block(struct btrfs_root *root, ret = add_tree_backref(extent_cache, ptr, parent, owner, 1); if (ret < 0) { - error("add_tree_backref failed: %s", + error( + "add_tree_backref failed (non-leaf block): %s", strerror(-ret)); continue; } @@ -7838,11 +8123,6 @@ static int run_next_block(struct btrfs_root *root, total_fs_tree_bytes += buf->len; if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) total_extent_tree_bytes += buf->len; - if (!found_old_backref && - btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID && - btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV && - !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) - found_old_backref = 1; out: free_extent_buffer(buf); return ret; @@ -7925,7 +8205,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans, back->node.found_extent_tree = 0; if (!back->node.found_extent_tree && back->node.found_ref) { - list_del(&back->node.list); + rb_erase(&back->node.node, &rec->backref_tree); free(back); } } else { @@ -7944,7 +8224,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans, back->node.found_extent_tree = 0; } if (!back->node.found_extent_tree && back->node.found_ref) { - list_del(&back->node.list); + rb_erase(&back->node.node, &rec->backref_tree); free(back); } } @@ -7956,7 +8236,7 @@ out: static int delete_extent_records(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - u64 bytenr, u64 new_len) + u64 bytenr) { struct btrfs_key key; struct btrfs_key found_key; @@ -8020,7 +8300,7 @@ static int delete_extent_records(struct btrfs_trans_handle *trans, if (found_key.type == BTRFS_EXTENT_ITEM_KEY || found_key.type == BTRFS_METADATA_ITEM_KEY) { u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ? - found_key.offset : root->nodesize; + found_key.offset : root->fs_info->nodesize; ret = btrfs_update_block_group(trans, root, bytenr, bytes, 0, 0); @@ -8044,7 +8324,7 @@ static int record_extent(struct btrfs_trans_handle *trans, struct extent_backref *back, int allocated, u64 flags) { - int ret; + int ret = 0; struct btrfs_root *extent_root = info->extent_root; struct extent_buffer *leaf; struct btrfs_key ins_key; @@ -8054,7 +8334,7 @@ static int record_extent(struct btrfs_trans_handle *trans, if (!back->is_data) rec->max_size = max_t(u64, rec->max_size, - info->extent_root->nodesize); + info->nodesize); if (!allocated) { u32 item_size = sizeof(*ei); @@ -8385,7 +8665,7 @@ out: static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct extent_record *rec) { - struct extent_backref *back; + struct extent_backref *back, *tmp; struct data_backref *dback; struct extent_entry *entry, *best = NULL; LIST_HEAD(entries); @@ -8401,7 +8681,8 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, if (rec->metadata) return 0; - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { if (back->full_backref || !back->is_data) continue; @@ -8527,7 +8808,8 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, * Ok great we all agreed on an extent record, let's go find the real * references and fix up the ones that don't match. */ - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { if (back->full_backref || !back->is_data) continue; @@ -8566,8 +8848,7 @@ out: return ret; } -static int process_duplicates(struct btrfs_root *root, - struct cache_tree *extent_cache, +static int process_duplicates(struct cache_tree *extent_cache, struct extent_record *rec) { struct extent_record *good, *tmp; @@ -8752,7 +9033,7 @@ static int find_possible_backrefs(struct btrfs_fs_info *info, struct extent_record *rec) { struct btrfs_root *root; - struct extent_backref *back; + struct extent_backref *back, *tmp; struct data_backref *dback; struct cache_extent *cache; struct btrfs_file_extent_item *fi; @@ -8760,7 +9041,8 @@ static int find_possible_backrefs(struct btrfs_fs_info *info, u64 bytenr, bytes; int ret; - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { /* Don't care about full backrefs (poor unloved backrefs) */ if (back->full_backref || !back->is_data) continue; @@ -8848,7 +9130,7 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, { struct btrfs_key key; struct btrfs_root *dest_root; - struct extent_backref *back; + struct extent_backref *back, *tmp; struct data_backref *dback; struct orphan_data_extent *orphan; struct btrfs_path path; @@ -8858,7 +9140,8 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, if (rec->metadata) return 1; btrfs_init_path(&path); - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { if (back->full_backref || !back->is_data || !back->found_extent_tree) continue; @@ -8926,9 +9209,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info, struct btrfs_trans_handle *trans = NULL; int ret; struct btrfs_path path; - struct list_head *cur = rec->backrefs.next; struct cache_extent *cache; - struct extent_backref *back; + struct extent_backref *back, *tmp; int allocated = 0; u64 flags = 0; @@ -8962,7 +9244,7 @@ static int fixup_extent_refs(struct btrfs_fs_info *info, /* step two, delete all the existing records */ ret = delete_extent_records(trans, info->extent_root, &path, - rec->start, rec->max_size); + rec->start); if (ret < 0) goto out; @@ -8976,10 +9258,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info, } /* step three, recreate all the refs we did find */ - while(cur != &rec->backrefs) { - back = to_extent_backref(cur); - cur = cur->next; - + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { /* * if we didn't find any references, don't create a * new extent record @@ -9001,6 +9281,10 @@ out: ret = err; } + if (!ret) + fprintf(stderr, "Repaired extent references for %llu\n", + (unsigned long long)rec->start); + btrfs_release_path(&path); return ret; } @@ -9058,7 +9342,12 @@ static int fixup_extent_flags(struct btrfs_fs_info *fs_info, btrfs_set_extent_flags(path.nodes[0], ei, flags); btrfs_mark_buffer_dirty(path.nodes[0]); btrfs_release_path(&path); - return btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (!ret) + fprintf(stderr, "Repaired extent flags for %llu\n", + (unsigned long long)rec->start); + + return ret; } /* right now we only prune from the extent allocation tree */ @@ -9123,7 +9412,7 @@ again: del_ptr: printk("deleting pointer to block %Lu\n", corrupt->cache.start); - ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot); + ret = btrfs_del_ptr(info->extent_root, &path, level, slot); out: btrfs_release_path(&path); @@ -9165,8 +9454,7 @@ static void reset_cached_block_groups(struct btrfs_fs_info *fs_info) &start, &end, EXTENT_DIRTY); if (ret) break; - clear_extent_dirty(&fs_info->free_space_cache, start, end, - GFP_NOFS); + clear_extent_dirty(&fs_info->free_space_cache, start, end); } start = 0; @@ -9185,11 +9473,8 @@ static int check_extent_refs(struct btrfs_root *root, { struct extent_record *rec; struct cache_extent *cache; - int err = 0; int ret = 0; - int fixed = 0; int had_dups = 0; - int recorded = 0; if (repair) { /* @@ -9203,8 +9488,7 @@ static int check_extent_refs(struct btrfs_root *root, rec = container_of(cache, struct extent_record, cache); set_extent_dirty(root->fs_info->excluded_extents, rec->start, - rec->start + rec->max_size - 1, - GFP_NOFS); + rec->start + rec->max_size - 1); cache = next_cache_extent(cache); } @@ -9213,8 +9497,7 @@ static int check_extent_refs(struct btrfs_root *root, while(cache) { set_extent_dirty(root->fs_info->excluded_extents, cache->start, - cache->start + cache->size - 1, - GFP_NOFS); + cache->start + cache->size - 1); cache = next_cache_extent(cache); } prune_corrupt_blocks(root->fs_info); @@ -9239,7 +9522,7 @@ static int check_extent_refs(struct btrfs_root *root, * process_duplicates() will return 0, otherwise it will return * 1 and we */ - if (process_duplicates(root, extent_cache, rec)) + if (process_duplicates(extent_cache, rec)) continue; ret = delete_duplicate_records(root, rec); if (ret < 0) @@ -9258,9 +9541,8 @@ static int check_extent_refs(struct btrfs_root *root, while(1) { int cur_err = 0; + int fix = 0; - fixed = 0; - recorded = 0; cache = search_cache_extent(extent_cache, 0); if (!cache) break; @@ -9268,7 +9550,6 @@ static int check_extent_refs(struct btrfs_root *root, if (rec->num_duplicates) { fprintf(stderr, "extent item %llu has multiple extent " "items\n", (unsigned long long)rec->start); - err = 1; cur_err = 1; } @@ -9282,54 +9563,31 @@ static int check_extent_refs(struct btrfs_root *root, ret = record_orphan_data_extents(root->fs_info, rec); if (ret < 0) goto repair_abort; - if (ret == 0) { - recorded = 1; - } else { - /* - * we can't use the extent to repair file - * extent, let the fallback method handle it. - */ - if (!fixed && repair) { - ret = fixup_extent_refs( - root->fs_info, - extent_cache, rec); - if (ret) - goto repair_abort; - fixed = 1; - } - } - err = 1; + fix = ret; cur_err = 1; } if (all_backpointers_checked(rec, 1)) { fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", (unsigned long long)rec->start, (unsigned long long)rec->nr); - - if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(root->fs_info, - extent_cache, rec); - if (ret) - goto repair_abort; - fixed = 1; - } + fix = 1; cur_err = 1; - err = 1; } if (!rec->owner_ref_checked) { fprintf(stderr, "owner ref check failed [%llu %llu]\n", (unsigned long long)rec->start, (unsigned long long)rec->nr); - if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(root->fs_info, - extent_cache, rec); - if (ret) - goto repair_abort; - fixed = 1; - } - err = 1; + fix = 1; cur_err = 1; } + + if (repair && fix) { + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); + if (ret) + goto repair_abort; + } + + if (rec->bad_full_backref) { fprintf(stderr, "bad full backref, on [%llu]\n", (unsigned long long)rec->start); @@ -9337,9 +9595,8 @@ static int check_extent_refs(struct btrfs_root *root, ret = fixup_extent_flags(root->fs_info, rec); if (ret) goto repair_abort; - fixed = 1; + fix = 1; } - err = 1; cur_err = 1; } /* @@ -9351,7 +9608,6 @@ static int check_extent_refs(struct btrfs_root *root, fprintf(stderr, "bad metadata [%llu, %llu) crossing stripe boundary\n", rec->start, rec->start + rec->max_size); - err = 1; cur_err = 1; } @@ -9359,17 +9615,15 @@ static int check_extent_refs(struct btrfs_root *root, fprintf(stderr, "bad extent [%llu, %llu), type mismatch with chunk\n", rec->start, rec->start + rec->max_size); - err = 1; cur_err = 1; } remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); - if (!init_extent_tree && repair && (!cur_err || fixed)) + if (!init_extent_tree && repair && (!cur_err || fix)) clear_extent_dirty(root->fs_info->excluded_extents, rec->start, - rec->start + rec->max_size - 1, - GFP_NOFS); + rec->start + rec->max_size - 1); free(rec); } repair_abort: @@ -9387,16 +9641,16 @@ repair_abort: goto repair_abort; } - btrfs_fix_block_accounting(trans, root); + ret = btrfs_fix_block_accounting(trans, root); + if (ret) + goto repair_abort; ret = btrfs_commit_transaction(trans, root); if (ret) goto repair_abort; } - if (err) - fprintf(stderr, "repaired damaged extent references\n"); return ret; } - return err; + return 0; } u64 calc_stripe_length(u64 type, u64 length, int num_stripes) @@ -9658,7 +9912,7 @@ static int check_devices(struct rb_root *dev_cache, static int add_root_item_to_list(struct list_head *head, u64 objectid, u64 bytenr, u64 last_snapshot, u8 level, u8 drop_level, - int level_size, struct btrfs_key *drop_key) + struct btrfs_key *drop_key) { struct root_item_record *ri_rec; @@ -9668,7 +9922,6 @@ static int add_root_item_to_list(struct list_head *head, ri_rec->bytenr = bytenr; ri_rec->objectid = objectid; ri_rec->level = level; - ri_rec->level_size = level_size; ri_rec->drop_level = drop_level; ri_rec->last_snapshot = last_snapshot; if (drop_key) @@ -9713,8 +9966,7 @@ static int deal_root_from_list(struct list_head *list, rec = list_entry(list->next, struct root_item_record, list); last = 0; - buf = read_tree_block(root->fs_info->tree_root, - rec->bytenr, rec->level_size, 0); + buf = read_tree_block(root->fs_info, rec->bytenr, 0); if (!extent_buffer_uptodate(buf)) { free_extent_buffer(buf); ret = -EIO; @@ -9758,7 +10010,7 @@ static int deal_root_from_list(struct list_head *list, return ret; } -static int check_chunks_and_extents(struct btrfs_root *root) +static int check_chunks_and_extents(struct btrfs_fs_info *fs_info) { struct rb_root dev_cache; struct cache_tree chunk_cache; @@ -9783,10 +10035,11 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct list_head dropping_trees; struct list_head normal_trees; struct btrfs_root *root1; + struct btrfs_root *root; u64 objectid; - u32 level_size; u8 level; + root = fs_info->fs_root; dev_cache = RB_ROOT; cache_tree_init(&chunk_cache); block_group_tree_init(&block_group_cache); @@ -9803,10 +10056,10 @@ static int check_chunks_and_extents(struct btrfs_root *root) INIT_LIST_HEAD(&normal_trees); if (repair) { - root->fs_info->excluded_extents = &excluded_extents; - root->fs_info->fsck_extent_cache = &extent_cache; - root->fs_info->free_extent_hook = free_extent_hook; - root->fs_info->corrupt_blocks = &corrupt_blocks; + fs_info->excluded_extents = &excluded_extents; + fs_info->fsck_extent_cache = &extent_cache; + fs_info->free_extent_hook = free_extent_hook; + fs_info->corrupt_blocks = &corrupt_blocks; } bits_nr = 1024; @@ -9822,26 +10075,23 @@ static int check_chunks_and_extents(struct btrfs_root *root) } again: - root1 = root->fs_info->tree_root; + root1 = fs_info->tree_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, 0, level, 0, - root1->nodesize, NULL); + root1->node->start, 0, level, 0, NULL); if (ret < 0) goto out; - root1 = root->fs_info->chunk_root; + root1 = fs_info->chunk_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, 0, level, 0, - root1->nodesize, NULL); + root1->node->start, 0, level, 0, NULL); if (ret < 0) goto out; btrfs_init_path(&path); key.offset = 0; key.objectid = 0; key.type = BTRFS_ROOT_ITEM_KEY; - ret = btrfs_search_slot(NULL, root->fs_info->tree_root, - &key, &path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0); if (ret < 0) goto out; while(1) { @@ -9864,17 +10114,15 @@ again: last_snapshot = btrfs_root_last_snapshot(&ri); if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { level = btrfs_root_level(&ri); - level_size = root->nodesize; ret = add_root_item_to_list(&normal_trees, found_key.objectid, btrfs_root_bytenr(&ri), last_snapshot, level, - 0, level_size, NULL); + 0, NULL); if (ret < 0) goto out; } else { level = btrfs_root_level(&ri); - level_size = root->nodesize; objectid = found_key.objectid; btrfs_disk_key_to_cpu(&found_key, &ri.drop_progress); @@ -9882,8 +10130,7 @@ again: objectid, btrfs_root_bytenr(&ri), last_snapshot, level, - ri.drop_level, - level_size, &found_key); + ri.drop_level, &found_key); if (ret < 0) goto out; } @@ -9938,12 +10185,12 @@ again: out: task_stop(ctx.info); if (repair) { - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + free_corrupt_blocks_tree(fs_info->corrupt_blocks); extent_io_tree_cleanup(&excluded_extents); - root->fs_info->fsck_extent_cache = NULL; - root->fs_info->free_extent_hook = NULL; - root->fs_info->corrupt_blocks = NULL; - root->fs_info->excluded_extents = NULL; + fs_info->fsck_extent_cache = NULL; + fs_info->free_extent_hook = NULL; + fs_info->corrupt_blocks = NULL; + fs_info->excluded_extents = NULL; } free(bits); free_chunk_cache_tree(&chunk_cache); @@ -9954,9 +10201,11 @@ out: free_extent_cache_tree(&pending); free_extent_cache_tree(&reada); free_extent_cache_tree(&nodes); + free_root_item_list(&normal_trees); + free_root_item_list(&dropping_trees); return ret; loop: - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + free_corrupt_blocks_tree(fs_info->corrupt_blocks); free_extent_cache_tree(&seen); free_extent_cache_tree(&pending); free_extent_cache_tree(&reada); @@ -9965,7 +10214,7 @@ loop: free_block_group_tree(&block_group_cache); free_device_cache_tree(&dev_cache); free_device_extent_tree(&dev_extent_cache); - free_extent_record_cache(root->fs_info, &extent_cache); + free_extent_record_cache(&extent_cache); free_root_item_list(&normal_trees); free_root_item_list(&dropping_trees); extent_io_tree_cleanup(&excluded_extents); @@ -9999,13 +10248,18 @@ static int check_tree_block_ref(struct btrfs_root *root, int slot; int skinny_level; int type; - u32 nodesize = root->nodesize; + u32 nodesize = root->fs_info->nodesize; u32 item_size; u64 offset; + int tree_reloc_root = 0; int found_ref = 0; int err = 0; int ret; + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && + btrfs_header_bytenr(root->node) == bytenr) + tree_reloc_root = 1; + btrfs_init_path(&path); key.objectid = bytenr; if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) @@ -10093,9 +10347,16 @@ static int check_tree_block_ref(struct btrfs_root *root, (offset == root->objectid || offset == owner)) { found_ref = 1; } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { + /* + * Backref of tree reloc root points to itself, no need + * to check backref any more. + */ + if (tree_reloc_root) + found_ref = 1; + else /* Check if the backref points to valid referencer */ - found_ref = !check_tree_block_ref(root, NULL, offset, - level + 1, owner); + found_ref = !check_tree_block_ref(root, NULL, + offset, level + 1, owner); } if (found_ref) @@ -10146,12 +10407,10 @@ static int check_extent_data_item(struct btrfs_root *root, struct btrfs_extent_inline_ref *iref; struct btrfs_extent_data_ref *dref; u64 owner; - u64 file_extent_gen; u64 disk_bytenr; u64 disk_num_bytes; u64 extent_num_bytes; u64 extent_flags; - u64 extent_gen; u32 item_size; unsigned long end; unsigned long ptr; @@ -10163,7 +10422,6 @@ static int check_extent_data_item(struct btrfs_root *root, btrfs_item_key_to_cpu(eb, &fi_key, slot); fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); - file_extent_gen = btrfs_file_extent_generation(eb, fi); /* Nothing to check for hole and inline data extents */ if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE || @@ -10175,20 +10433,20 @@ static int check_extent_data_item(struct btrfs_root *root, extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi); /* Check unaligned disk_num_bytes and num_bytes */ - if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) { + if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) { error( "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u", fi_key.objectid, fi_key.offset, disk_num_bytes, - root->sectorsize); + root->fs_info->sectorsize); err |= BYTES_UNALIGNED; } else { data_bytes_allocated += disk_num_bytes; } - if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) { + if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) { error( "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u", fi_key.objectid, fi_key.offset, extent_num_bytes, - root->sectorsize); + root->fs_info->sectorsize); err |= BYTES_UNALIGNED; } else { data_bytes_referenced += extent_num_bytes; @@ -10202,17 +10460,14 @@ static int check_extent_data_item(struct btrfs_root *root, dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi); ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0); - if (ret) { - err |= BACKREF_MISSING; - goto error; - } + if (ret) + goto out; leaf = path.nodes[0]; slot = path.slots[0]; ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); extent_flags = btrfs_extent_flags(leaf, ei); - extent_gen = btrfs_extent_generation(leaf, ei); if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) { error( @@ -10222,14 +10477,6 @@ static int check_extent_data_item(struct btrfs_root *root, err |= BACKREF_MISMATCH; } - if (file_extent_gen < extent_gen) { - error( -"extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu", - disk_bytenr, disk_num_bytes, file_extent_gen, - extent_gen); - err |= BACKREF_MISMATCH; - } - /* Check data backref inside that extent item */ item_size = btrfs_item_size_nr(leaf, path.slots[0]); iref = (struct btrfs_extent_inline_ref *)(ei + 1); @@ -10255,11 +10502,10 @@ static int check_extent_data_item(struct btrfs_root *root, ptr += btrfs_extent_inline_ref_size(type); } - /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */ if (!found_dbackref) { btrfs_release_path(&path); - btrfs_init_path(&path); + /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */ dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY; dbref_key.offset = hash_extent_data_ref(root->objectid, @@ -10267,13 +10513,32 @@ static int check_extent_data_item(struct btrfs_root *root, ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &dbref_key, &path, 0, 0); - if (!ret) + if (!ret) { + found_dbackref = 1; + goto out; + } + + btrfs_release_path(&path); + + /* + * Neither inlined nor EXTENT_DATA_REF found, try + * SHARED_DATA_REF as last chance. + */ + dbref_key.objectid = disk_bytenr; + dbref_key.type = BTRFS_SHARED_DATA_REF_KEY; + dbref_key.offset = eb->start; + + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &dbref_key, &path, 0, 0); + if (!ret) { found_dbackref = 1; + goto out; + } } +out: if (!found_dbackref) err |= BACKREF_MISSING; -error: btrfs_release_path(&path); if (err & BACKREF_MISSING) { error("data extent[%llu %llu] backref lost", @@ -10295,7 +10560,6 @@ static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr) struct btrfs_extent_item *ei; u64 flags; u64 transid; - u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); u8 backref_level; u8 header_level; int ret; @@ -10341,7 +10605,7 @@ static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr) btrfs_release_path(&path); /* Get level from tree block as an alternative source */ - eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid); + eb = read_tree_block(fs_info, bytenr, transid); if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; @@ -10394,7 +10658,7 @@ static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id, } /* Read out the tree block to get item/node key */ - eb = read_tree_block(root, bytenr, root->nodesize, 0); + eb = read_tree_block(fs_info, bytenr, 0); if (!extent_buffer_uptodate(eb)) { err |= REFERENCER_MISSING; free_extent_buffer(eb); @@ -10456,6 +10720,34 @@ out: } /* + * Check if tree block @eb is tree reloc root. + * Return 0 if it's not or any problem happens + * Return 1 if it's a tree reloc root + */ +static int is_tree_reloc_root(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb) +{ + struct btrfs_root *tree_reloc_root; + struct btrfs_key key; + u64 bytenr = btrfs_header_bytenr(eb); + u64 owner = btrfs_header_owner(eb); + int ret = 0; + + key.objectid = BTRFS_TREE_RELOC_OBJECTID; + key.offset = owner; + key.type = BTRFS_ROOT_ITEM_KEY; + + tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key); + if (IS_ERR(tree_reloc_root)) + return 0; + + if (bytenr == btrfs_header_bytenr(tree_reloc_root->node)) + ret = 1; + btrfs_free_fs_root(tree_reloc_root); + return ret; +} + +/* * Check referencer for shared block backref * If level == -1, this function will resolve the level. */ @@ -10463,12 +10755,11 @@ static int check_shared_block_backref(struct btrfs_fs_info *fs_info, u64 parent, u64 bytenr, int level) { struct extent_buffer *eb; - u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); u32 nr; int found_parent = 0; int i; - eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + eb = read_tree_block(fs_info, parent, 0); if (!extent_buffer_uptodate(eb)) goto out; @@ -10477,6 +10768,13 @@ static int check_shared_block_backref(struct btrfs_fs_info *fs_info, if (level < 0) goto out; + /* It's possible it's a tree reloc root */ + if (parent == bytenr) { + if (is_tree_reloc_root(fs_info, eb)) + found_parent = 1; + goto out; + } + if (level + 1 != btrfs_header_level(eb)) goto out; @@ -10492,7 +10790,7 @@ out: if (!found_parent) { error( "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)", - bytenr, nodesize, parent, level); + bytenr, fs_info->nodesize, parent, level); return REFERENCER_MISSING; } return 0; @@ -10568,6 +10866,8 @@ static int check_extent_data_backref(struct btrfs_fs_info *fs_info, leaf = path.nodes[0]; slot = path.slots[0]; + if (slot >= btrfs_header_nritems(leaf)) + goto next; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) break; @@ -10583,6 +10883,7 @@ static int check_extent_data_backref(struct btrfs_fs_info *fs_info, offset) found_count++; +next: ret = btrfs_next_item(root, &path); if (ret) break; @@ -10607,12 +10908,11 @@ static int check_shared_data_backref(struct btrfs_fs_info *fs_info, struct extent_buffer *eb; struct btrfs_key key; struct btrfs_file_extent_item *fi; - u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); u32 nr; int found_parent = 0; int i; - eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + eb = read_tree_block(fs_info, parent, 0); if (!extent_buffer_uptodate(eb)) goto out; @@ -10710,13 +11010,20 @@ static int check_extent_item(struct btrfs_fs_info *fs_info, } end = (unsigned long)ei + item_size; - if (ptr >= end) { +next: + /* Reached extent item end normally */ + if (ptr == end) + goto out; + + /* Beyond extent item end, wrong item size */ + if (ptr > end) { err |= ITEM_SIZE_MISMATCH; + error("extent item at bytenr %llu slot %d has wrong size", + eb->start, slot); goto out; } /* Now check every backref in this extent item */ -next: iref = (struct btrfs_extent_inline_ref *)ptr; type = btrfs_extent_inline_ref_type(eb, iref); offset = btrfs_extent_inline_ref_offset(eb, iref); @@ -10753,8 +11060,7 @@ next: } ptr += btrfs_extent_inline_ref_size(type); - if (ptr < end) - goto next; + goto next; out: return err; @@ -10794,7 +11100,12 @@ static int check_dev_extent_item(struct btrfs_fs_info *fs_info, l = path.nodes[0]; chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk); - if (btrfs_chunk_length(l, chunk) != length) + ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0], + chunk_key.offset); + if (ret < 0) + goto out; + + if (btrfs_stripe_length(fs_info, l, chunk) != length) goto out; num_stripes = btrfs_chunk_num_stripes(l, chunk); @@ -10855,8 +11166,10 @@ static int check_dev_item(struct btrfs_fs_info *fs_info, /* Iterate dev_extents to calculate the used space of a device */ while (1) { - btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) + goto next; + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); if (key.objectid > dev_id) break; if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id) @@ -10953,6 +11266,11 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, /* Iterate extent tree to account used space */ while (1) { leaf = path.nodes[0]; + + /* Search slot can point to the last item beyond leaf nritems */ + if (path.slots[0] >= btrfs_header_nritems(leaf)) + goto next; + btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]); if (extent_key.objectid >= bg_key.objectid + bg_key.offset) break; @@ -11025,11 +11343,10 @@ static int check_chunk_item(struct btrfs_fs_info *fs_info, struct btrfs_block_group_item *bi; struct btrfs_block_group_item bg_item; struct btrfs_dev_extent *ptr; - u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy); u64 length; u64 chunk_end; + u64 stripe_len; u64 type; - u64 profile; int num_stripes; u64 offset; u64 objectid; @@ -11041,25 +11358,15 @@ static int check_chunk_item(struct btrfs_fs_info *fs_info, chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); length = btrfs_chunk_length(eb, chunk); chunk_end = chunk_key.offset + length; - if (!IS_ALIGNED(length, sectorsize)) { - error("chunk[%llu %llu) not aligned to %u", - chunk_key.offset, chunk_end, sectorsize); - err |= BYTES_UNALIGNED; + ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot, + chunk_key.offset); + if (ret < 0) { + error("chunk[%llu %llu) is invalid", chunk_key.offset, + chunk_end); + err |= BYTES_UNALIGNED | UNKNOWN_TYPE; goto out; } - type = btrfs_chunk_type(eb, chunk); - profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK; - if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { - error("chunk[%llu %llu) has no chunk type", - chunk_key.offset, chunk_end); - err |= UNKNOWN_TYPE; - } - if (profile && (profile & (profile - 1))) { - error("chunk[%llu %llu) multiple profiles detected: %llx", - chunk_key.offset, chunk_end, profile); - err |= UNKNOWN_TYPE; - } bg_key.objectid = chunk_key.offset; bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; @@ -11088,6 +11395,7 @@ static int check_chunk_item(struct btrfs_fs_info *fs_info, } num_stripes = btrfs_chunk_num_stripes(eb, chunk); + stripe_len = btrfs_stripe_length(fs_info, eb, chunk); for (i = 0; i < num_stripes; i++) { btrfs_release_path(&path); btrfs_init_path(&path); @@ -11107,7 +11415,7 @@ static int check_chunk_item(struct btrfs_fs_info *fs_info, offset = btrfs_dev_extent_chunk_offset(leaf, ptr); if (objectid != chunk_key.objectid || offset != chunk_key.offset || - btrfs_dev_extent_length(leaf, ptr) != length) + btrfs_dev_extent_length(leaf, ptr) != stripe_len) goto not_match_dev; continue; not_match_dev: @@ -11323,11 +11631,6 @@ static int traverse_tree_block(struct btrfs_root *root, total_fs_tree_bytes += node->len; if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID) total_extent_tree_bytes += node->len; - if (!found_old_backref && - btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID && - btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV && - !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC)) - found_old_backref = 1; /* pre-order tranversal, check itself first */ level = btrfs_header_level(node); @@ -11366,7 +11669,7 @@ static int traverse_tree_block(struct btrfs_root *root, * As a btrfs tree has most 8 levels (0..7), so it's quite safe * to call the function itself. */ - eb = read_tree_block(root, blocknr, root->nodesize, 0); + eb = read_tree_block(root->fs_info, blocknr, 0); if (extent_buffer_uptodate(eb)) { ret = traverse_tree_block(root, eb); err |= ret; @@ -11380,15 +11683,18 @@ static int traverse_tree_block(struct btrfs_root *root, /* * Low memory usage version check_chunks_and_extents. */ -static int check_chunks_and_extents_v2(struct btrfs_root *root) +static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info) { struct btrfs_path path; struct btrfs_key key; struct btrfs_root *root1; + struct btrfs_root *root; struct btrfs_root *cur_root; int err = 0; int ret; + root = fs_info->fs_root; + root1 = root->fs_info->chunk_root; ret = traverse_tree_block(root1, root1->node); err |= ret; @@ -11414,7 +11720,11 @@ static int check_chunks_and_extents_v2(struct btrfs_root *root) goto next; key.offset = (u64)-1; - cur_root = btrfs_read_fs_root(root->fs_info, &key); + if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) + cur_root = btrfs_read_fs_root_no_cache(root->fs_info, + &key); + else + cur_root = btrfs_read_fs_root(root->fs_info, &key); if (IS_ERR(cur_root) || !cur_root) { error("failed to read tree: %lld", key.objectid); goto next; @@ -11423,6 +11733,8 @@ static int check_chunks_and_extents_v2(struct btrfs_root *root) ret = traverse_tree_block(cur_root, cur_root->node); err |= ret; + if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) + btrfs_free_fs_root(cur_root); next: ret = btrfs_next_item(root1, &path); if (ret) @@ -11434,6 +11746,20 @@ out: return err; } +static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info) +{ + int ret; + + if (!ctx.progress_enabled) + fprintf(stderr, "checking extents\n"); + if (check_mode == CHECK_MODE_LOWMEM) + ret = check_chunks_and_extents_v2(fs_info); + else + ret = check_chunks_and_extents(fs_info); + + return ret; +} + static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, int overwrite) { @@ -11451,7 +11777,7 @@ static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, goto init; } c = btrfs_alloc_free_block(trans, root, - root->nodesize, + root->fs_info->nodesize, root->root_key.objectid, &disk_key, level, 0, 0); if (IS_ERR(c)) { @@ -11508,7 +11834,6 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, struct btrfs_root_item *ri; struct btrfs_key key; u64 bytenr; - u32 nodesize; int level = btrfs_header_level(eb); int nritems; int ret; @@ -11525,7 +11850,6 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, btrfs_pin_extent(fs_info, eb->start, eb->len); - nodesize = btrfs_super_nodesize(fs_info->super_copy); nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { if (level == 0) { @@ -11546,8 +11870,7 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, * in, but for now this doesn't actually use the root so * just pass in extent_root. */ - tmp = read_tree_block(fs_info->extent_root, bytenr, - nodesize, 0); + tmp = read_tree_block(fs_info, bytenr, 0); if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading root block\n"); return -EIO; @@ -11561,12 +11884,12 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, /* If we aren't the tree root don't read the block */ if (level == 1 && !tree_root) { - btrfs_pin_extent(fs_info, bytenr, nodesize); + btrfs_pin_extent(fs_info, bytenr, + fs_info->nodesize); continue; } - tmp = read_tree_block(fs_info->extent_root, bytenr, - nodesize, 0); + tmp = read_tree_block(fs_info, bytenr, 0); if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading tree block\n"); return -EIO; @@ -11647,8 +11970,7 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info) key.objectid, key.offset, btrfs_chunk_length(leaf, chunk)); set_extent_dirty(&fs_info->free_space_cache, key.offset, - key.offset + btrfs_chunk_length(leaf, chunk), - GFP_NOFS); + key.offset + btrfs_chunk_length(leaf, chunk)); path.slots[0]++; } start = 0; @@ -11945,13 +12267,14 @@ static int populate_csum(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root, char *buf, u64 start, u64 len) { + struct btrfs_fs_info *fs_info = csum_root->fs_info; u64 offset = 0; u64 sectorsize; int ret = 0; while (offset < len) { - sectorsize = csum_root->sectorsize; - ret = read_extent_data(csum_root, buf, start + offset, + sectorsize = fs_info->sectorsize; + ret = read_extent_data(fs_info, buf, start + offset, §orsize, 0); if (ret) break; @@ -11978,7 +12301,7 @@ static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans, int slot = 0; int ret = 0; - buf = malloc(cur_root->fs_info->csum_root->sectorsize); + buf = malloc(cur_root->fs_info->sectorsize); if (!buf) return -ENOMEM; @@ -12110,7 +12433,7 @@ static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, return ret; } - buf = malloc(csum_root->sectorsize); + buf = malloc(csum_root->fs_info->sectorsize); if (!buf) { btrfs_release_path(&path); return -ENOMEM; @@ -12311,8 +12634,7 @@ out: return ret; } -static int maybe_repair_root_item(struct btrfs_fs_info *info, - struct btrfs_path *path, +static int maybe_repair_root_item(struct btrfs_path *path, const struct btrfs_key *root_key, const int read_only_mode) { @@ -12467,8 +12789,7 @@ again: if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID) goto next; - ret = maybe_repair_root_item(info, &path, &found_key, - trans ? 0 : 1); + ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1); if (ret < 0) goto out; if (ret) { @@ -12525,6 +12846,45 @@ static int clear_free_space_cache(struct btrfs_fs_info *fs_info) return ret; } +static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info, + int clear_version) +{ + int ret = 0; + + if (clear_version == 1) { + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + error( + "free space cache v2 detected, use --clear-space-cache v2"); + ret = 1; + goto close_out; + } + printf("Clearing free space cache\n"); + ret = clear_free_space_cache(fs_info); + if (ret) { + error("failed to clear free space cache"); + ret = 1; + } else { + printf("Free space cache cleared\n"); + } + } else if (clear_version == 2) { + if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + printf("no free space cache v2 to clear\n"); + ret = 0; + goto close_out; + } + printf("Clear free space cache v2\n"); + ret = btrfs_clear_free_space_tree(fs_info); + if (ret) { + error("failed to clear free space cache v2: %d", ret); + ret = 1; + } else { + printf("free space cache v2 cleared\n"); + } + } +close_out: + return ret; +} + const char * const cmd_check_usage[] = { "btrfs check [options] ", "Check structural integrity of a filesystem (unmounted).", @@ -12535,6 +12895,7 @@ const char * const cmd_check_usage[] = { "", "-s|--super use this superblock copy", "-b|--backup use the first valid backup root copy", + "--force skip mount checks, repair is not possible", "--repair try to repair the filesystem", "--readonly run in read-only mode (default)", "--init-csum-tree create a new CRC tree", @@ -12566,7 +12927,7 @@ int cmd_check(int argc, char **argv) u64 tree_root_bytenr = 0; u64 chunk_root_bytenr = 0; char uuidbuf[BTRFS_UUID_UNPARSED_SIZE]; - int ret; + int ret = 0; int err = 0; u64 num; int init_csum_tree = 0; @@ -12575,13 +12936,15 @@ int cmd_check(int argc, char **argv) int qgroup_report = 0; int qgroups_repaired = 0; unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE; + int force = 0; while(1) { int c; enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM, GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM, GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE, - GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE }; + GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE, + GETOPT_VAL_FORCE }; static const struct option long_options[] = { { "super", required_argument, NULL, 's' }, { "repair", no_argument, NULL, GETOPT_VAL_REPAIR }, @@ -12603,10 +12966,11 @@ int cmd_check(int argc, char **argv) GETOPT_VAL_MODE }, { "clear-space-cache", required_argument, NULL, GETOPT_VAL_CLEAR_SPACE_CACHE}, + { "force", no_argument, NULL, GETOPT_VAL_FORCE }, { NULL, 0, NULL, 0} }; - c = getopt_long(argc, argv, "as:br:p", long_options, NULL); + c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL); if (c < 0) break; switch(c) { @@ -12687,6 +13051,9 @@ int cmd_check(int argc, char **argv) } ctree_flags |= OPEN_CTREE_WRITES; break; + case GETOPT_VAL_FORCE: + force = 1; + break; } } @@ -12705,25 +13072,46 @@ int cmd_check(int argc, char **argv) } /* - * Not supported yet + * experimental and dangerous */ - if (repair && check_mode == CHECK_MODE_LOWMEM) { - error("low memory mode doesn't support repair yet"); - exit(1); - } + if (repair && check_mode == CHECK_MODE_LOWMEM) + warning("low-memory mode repair support is only partial"); radix_tree_init(); cache_tree_init(&root_cache); - if((ret = check_mounted(argv[optind])) < 0) { - error("could not check mount status: %s", strerror(-ret)); - err |= !!ret; - goto err_out; - } else if(ret) { - error("%s is currently mounted, aborting", argv[optind]); - ret = -EBUSY; - err |= !!ret; - goto err_out; + ret = check_mounted(argv[optind]); + if (!force) { + if (ret < 0) { + error("could not check mount status: %s", + strerror(-ret)); + err |= !!ret; + goto err_out; + } else if (ret) { + error( +"%s is currently mounted, use --force if you really intend to check the filesystem", + argv[optind]); + ret = -EBUSY; + err |= !!ret; + goto err_out; + } + } else { + if (repair) { + error("repair and --force is not yet supported"); + ret = 1; + err |= !!ret; + goto err_out; + } + if (ret < 0) { + warning( +"cannot check mount status of %s, the filesystem could be mounted, continuing because of --force", + argv[optind]); + } else if (ret) { + warning( + "filesystem mounted, continuing because of --force"); + } + /* A block device is mounted in exclusive mode by kernel */ + ctree_flags &= ~OPEN_CTREE_EXCLUSIVE; } /* only allow partial opening under repair mode */ @@ -12741,36 +13129,26 @@ int cmd_check(int argc, char **argv) global_info = info; root = info->fs_root; - if (clear_space_cache == 1) { - if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) { - error( - "free space cache v2 detected, use --clear-space-cache v2"); - ret = 1; - goto close_out; - } - printf("Clearing free space cache\n"); - ret = clear_free_space_cache(info); - if (ret) { - error("failed to clear free space cache"); - ret = 1; - } else { - printf("Free space cache cleared\n"); - } + uuid_unparse(info->super_copy->fsid, uuidbuf); + + printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf); + + /* + * Check the bare minimum before starting anything else that could rely + * on it, namely the tree roots, any local consistency checks + */ + if (!extent_buffer_uptodate(info->tree_root->node) || + !extent_buffer_uptodate(info->dev_root->node) || + !extent_buffer_uptodate(info->chunk_root->node)) { + error("critical roots corrupted, unable to check the filesystem"); + err |= !!ret; + ret = -EIO; goto close_out; - } else if (clear_space_cache == 2) { - if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) { - printf("no free space cache v2 to clear\n"); - ret = 0; - goto close_out; - } - printf("Clear free space cache v2\n"); - ret = btrfs_clear_free_space_tree(info); - if (ret) { - error("failed to clear free space cache v2: %d", ret); - ret = 1; - } else { - printf("free space cache v2 cleared\n"); - } + } + + if (clear_space_cache) { + ret = do_clear_free_space_cache(info, clear_space_cache); + err |= !!ret; goto close_out; } @@ -12793,7 +13171,6 @@ int cmd_check(int argc, char **argv) } } - uuid_unparse(info->super_copy->fsid, uuidbuf); if (qgroup_report) { printf("Print quota groups for %s\nUUID: %s\n", argv[optind], uuidbuf); @@ -12810,16 +13187,6 @@ int cmd_check(int argc, char **argv) err |= !!ret; goto close_out; } - printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf); - - if (!extent_buffer_uptodate(info->tree_root->node) || - !extent_buffer_uptodate(info->dev_root->node) || - !extent_buffer_uptodate(info->chunk_root->node)) { - error("critical roots corrupted, unable to check the filesystem"); - err |= !!ret; - ret = -EIO; - goto close_out; - } if (init_extent_tree || init_csum_tree) { struct btrfs_trans_handle *trans; @@ -12881,12 +13248,7 @@ int cmd_check(int argc, char **argv) goto close_out; } - if (!ctx.progress_enabled) - fprintf(stderr, "checking extents\n"); - if (check_mode == CHECK_MODE_LOWMEM) - ret = check_chunks_and_extents_v2(root); - else - ret = check_chunks_and_extents(root); + ret = do_check_chunks_and_extents(info); err |= !!ret; if (ret) error( @@ -12894,8 +13256,10 @@ int cmd_check(int argc, char **argv) ret = repair_root_items(info); err |= !!ret; - if (ret < 0) + if (ret < 0) { + error("failed to repair root items: %s", strerror(-ret)); goto close_out; + } if (repair) { fprintf(stderr, "Fixed %d roots.\n", ret); ret = 0; @@ -12918,8 +13282,13 @@ int cmd_check(int argc, char **argv) } ret = check_space_cache(root); err |= !!ret; - if (ret) + if (ret) { + if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) + error("errors found in free space tree"); + else + error("errors found in free space cache"); goto out; + } /* * We used to have to have these hole extents in between our real @@ -12928,29 +13297,30 @@ int cmd_check(int argc, char **argv) * ignore it when this happens. */ no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); - if (!ctx.progress_enabled) - fprintf(stderr, "checking fs roots\n"); - if (check_mode == CHECK_MODE_LOWMEM) - ret = check_fs_roots_v2(root->fs_info); - else - ret = check_fs_roots(root, &root_cache); + ret = do_check_fs_roots(info, &root_cache); err |= !!ret; - if (ret) + if (ret) { + error("errors found in fs roots"); goto out; + } fprintf(stderr, "checking csums\n"); ret = check_csums(root); err |= !!ret; - if (ret) + if (ret) { + error("errors found in csum tree"); goto out; + } fprintf(stderr, "checking root refs\n"); /* For low memory mode, check_fs_roots_v2 handles root refs */ if (check_mode != CHECK_MODE_LOWMEM) { ret = check_root_refs(root, &root_cache); err |= !!ret; - if (ret) + if (ret) { + error("errors found in root refs"); goto out; + } } while (repair && !list_empty(&root->fs_info->recow_ebs)) { @@ -12961,8 +13331,10 @@ int cmd_check(int argc, char **argv) list_del_init(&eb->recow); ret = recow_extent_buffer(root, eb); err |= !!ret; - if (ret) + if (ret) { + error("fails to fix transid errors"); break; + } } while (!list_empty(&delete_items)) { @@ -12981,13 +13353,17 @@ int cmd_check(int argc, char **argv) fprintf(stderr, "checking quota groups\n"); ret = qgroup_verify_all(info); err |= !!ret; - if (ret) + if (ret) { + error("failed to check quota groups"); goto out; + } report_qgroups(0); ret = repair_qgroups(info, &qgroups_repaired); err |= !!ret; - if (err) + if (err) { + error("failed to repair quota groups"); goto out; + } ret = 0; } @@ -12997,19 +13373,12 @@ int cmd_check(int argc, char **argv) err |= !!ret; } out: - if (found_old_backref) { /* - * there was a disk format change when mixed - * backref was in testing tree. The old format - * existed about one week. - */ - printf("\n * Found old mixed backref format. " - "The old format is not supported! *" - "\n * Please mount the FS in readonly mode, " - "backup data and re-format the FS. *\n\n"); - err |= 1; - } - printf("found %llu bytes used err is %d\n", - (unsigned long long)bytes_used, ret); + printf("found %llu bytes used, ", + (unsigned long long)bytes_used); + if (err) + printf("error(s) found\n"); + else + printf("no error found\n"); printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes); printf("total tree bytes: %llu\n", (unsigned long long)total_btree_bytes);