X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=cmds-check.c;h=e28ad2f728e7fe92e769c665d3443a0a442ad444;hb=441d8aea8ff939dcd4e37286831d42c91610b5eb;hp=32fc20c4cc69c66c1458db93b2e112a7687ea964;hpb=d07873c007b518424915945c6a2331481ec587cd;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/cmds-check.c b/cmds-check.c index 32fc20c..e28ad2f 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -74,8 +74,17 @@ static struct btrfs_fs_info *global_info; static struct task_ctx ctx = { 0 }; static struct cache_tree *roots_info_cache = NULL; +enum btrfs_check_mode { + CHECK_MODE_ORIGINAL, + CHECK_MODE_LOWMEM, + CHECK_MODE_UNKNOWN, + CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL +}; + +static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT; + struct extent_backref { - struct rb_node node; + struct list_head list; unsigned int is_data:1; unsigned int found_extent_tree:1; unsigned int full_backref:1; @@ -83,9 +92,9 @@ struct extent_backref { unsigned int broken:1; }; -static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node) +static inline struct extent_backref* to_extent_backref(struct list_head *entry) { - return rb_entry(node, struct extent_backref, node); + return list_entry(entry, struct extent_backref, list); } struct data_backref { @@ -108,56 +117,6 @@ static inline struct data_backref* to_data_backref(struct extent_backref *back) return container_of(back, struct data_backref, node); } -static int compare_data_backref(struct rb_node *node1, struct rb_node *node2) -{ - struct extent_backref *ext1 = rb_node_to_extent_backref(node1); - struct extent_backref *ext2 = rb_node_to_extent_backref(node2); - struct data_backref *back1 = to_data_backref(ext1); - struct data_backref *back2 = to_data_backref(ext2); - - WARN_ON(!ext1->is_data); - WARN_ON(!ext2->is_data); - - /* parent and root are a union, so this covers both */ - if (back1->parent > back2->parent) - return 1; - if (back1->parent < back2->parent) - return -1; - - /* This is a full backref and the parents match. */ - if (back1->node.full_backref) - return 0; - - if (back1->owner > back2->owner) - return 1; - if (back1->owner < back2->owner) - return -1; - - if (back1->offset > back2->offset) - return 1; - if (back1->offset < back2->offset) - return -1; - - if (back1->bytes > back2->bytes) - return 1; - if (back1->bytes < back2->bytes) - return -1; - - if (back1->found_ref && back2->found_ref) { - if (back1->disk_bytenr > back2->disk_bytenr) - return 1; - if (back1->disk_bytenr < back2->disk_bytenr) - return -1; - - if (back1->found_ref > back2->found_ref) - return 1; - if (back1->found_ref < back2->found_ref) - return -1; - } - - return 0; -} - /* * Much like data_backref, just removed the undetermined members * and change it to use list_head. @@ -186,54 +145,12 @@ static inline struct tree_backref* to_tree_backref(struct extent_backref *back) return container_of(back, struct tree_backref, node); } -static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2) -{ - struct extent_backref *ext1 = rb_node_to_extent_backref(node1); - struct extent_backref *ext2 = rb_node_to_extent_backref(node2); - struct tree_backref *back1 = to_tree_backref(ext1); - struct tree_backref *back2 = to_tree_backref(ext2); - - WARN_ON(ext1->is_data); - WARN_ON(ext2->is_data); - - /* parent and root are a union, so this covers both */ - if (back1->parent > back2->parent) - return 1; - if (back1->parent < back2->parent) - return -1; - - return 0; -} - -static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2) -{ - struct extent_backref *ext1 = rb_node_to_extent_backref(node1); - struct extent_backref *ext2 = rb_node_to_extent_backref(node2); - - if (ext1->is_data > ext2->is_data) - return 1; - - if (ext1->is_data < ext2->is_data) - return -1; - - if (ext1->full_backref > ext2->full_backref) - return 1; - if (ext1->full_backref < ext2->full_backref) - return -1; - - if (ext1->is_data) - return compare_data_backref(node1, node2); - else - return compare_tree_backref(node1, node2); -} - /* Explicit initialization for extent_record::flag_block_full_backref */ enum { FLAG_UNSET = 2 }; struct extent_record { struct list_head backrefs; struct list_head dups; - struct rb_root backref_tree; struct list_head list; struct cache_extent cache; struct btrfs_disk_key parent_key; @@ -268,9 +185,9 @@ struct inode_backref { unsigned int found_dir_item:1; unsigned int found_dir_index:1; unsigned int found_inode_ref:1; - unsigned int filetype:8; + u8 filetype; + u8 ref_type; int errors; - unsigned int ref_type; u64 dir; u64 index; u16 namelen; @@ -445,6 +362,11 @@ struct root_item_info { #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */ #define REFERENCER_MISSING (1 << 3) /* Referencer not found */ #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */ +#define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */ +#define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */ +#define UNKNOWN_TYPE (1 << 6) /* Unknown type */ +#define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */ +#define CHUNK_TYPE_MISMATCH (1 << 8) static void *print_status_check(void *p) { @@ -480,6 +402,18 @@ static int print_status_return(void *p) return 0; } +static enum btrfs_check_mode parse_check_mode(const char *str) +{ + if (strcmp(str, "lowmem") == 0) + return CHECK_MODE_LOWMEM; + if (strcmp(str, "orig") == 0) + return CHECK_MODE_ORIGINAL; + if (strcmp(str, "original") == 0) + return CHECK_MODE_ORIGINAL; + + return CHECK_MODE_UNKNOWN; +} + /* Compatible function to allow reuse of old codes */ static u64 first_extent_gap(struct rb_root *holes) { @@ -725,6 +659,7 @@ static struct inode_record *clone_inode_rec(struct inode_record *orig_rec) struct inode_backref *tmp; struct orphan_data_extent *src_orphan; struct orphan_data_extent *dst_orphan; + struct rb_node *rb; size_t size; int ret; @@ -757,10 +692,21 @@ static struct inode_record *clone_inode_rec(struct inode_record *orig_rec) list_add_tail(&dst_orphan->list, &rec->orphan_extents); } ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes); - BUG_ON(ret < 0); + if (ret < 0) + goto cleanup_rb; return rec; +cleanup_rb: + rb = rb_first(&rec->holes); + while (rb) { + struct file_extent_hole *hole; + + hole = rb_entry(rb, struct file_extent_hole, node); + rb = rb_next(rb); + free(hole); + } + cleanup: if (!list_empty(&rec->backrefs)) list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) { @@ -990,7 +936,7 @@ static void maybe_free_inode_rec(struct cache_tree *inode_cache, struct cache_extent *cache; struct inode_backref *tmp, *backref; struct ptr_node *node; - unsigned char filetype; + u8 filetype; if (!rec->found_inode_item) return; @@ -1121,7 +1067,7 @@ static struct inode_backref *get_inode_backref(struct inode_record *rec, static int add_inode_backref(struct cache_tree *inode_cache, u64 ino, u64 dir, u64 index, const char *name, int namelen, - int filetype, int itemtype, int errors) + u8 filetype, u8 itemtype, int errors) { struct inode_record *rec; struct inode_backref *backref; @@ -1524,7 +1470,7 @@ static int process_dir_item(struct btrfs_root *root, u32 data_len; int error; int nritems = 0; - int filetype; + u8 filetype; struct btrfs_dir_item *di; struct inode_record *rec; struct cache_tree *root_cache; @@ -2240,7 +2186,7 @@ static int add_missing_dir_index(struct btrfs_root *root, struct inode_record *rec, struct inode_backref *backref) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_trans_handle *trans; struct btrfs_dir_item *dir_item; struct extent_buffer *leaf; @@ -2251,27 +2197,22 @@ static int add_missing_dir_index(struct btrfs_root *root, u32 data_size = sizeof(*dir_item) + backref->namelen; int ret; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } fprintf(stderr, "repairing missing dir index item for inode %llu\n", (unsigned long long)rec->ino); + + btrfs_init_path(&path); key.objectid = backref->dir; key.type = BTRFS_DIR_INDEX_KEY; key.offset = backref->index; - - ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); + ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size); BUG_ON(ret); - leaf = path->nodes[0]; - dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); + leaf = path.nodes[0]; + dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item); disk_key.objectid = cpu_to_le64(rec->ino); disk_key.type = BTRFS_INODE_ITEM_KEY; @@ -2284,7 +2225,7 @@ static int add_missing_dir_index(struct btrfs_root *root, name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen); btrfs_mark_buffer_dirty(leaf); - btrfs_free_path(path); + btrfs_release_path(&path); btrfs_commit_transaction(trans, root); backref->found_dir_index = 1; @@ -2309,31 +2250,25 @@ static int delete_dir_index(struct btrfs_root *root, { struct btrfs_trans_handle *trans; struct btrfs_dir_item *di; - struct btrfs_path *path; + struct btrfs_path path; int ret = 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } - fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n", (unsigned long long)backref->dir, BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index, (unsigned long long)root->objectid); - di = btrfs_lookup_dir_index(trans, root, path, backref->dir, + btrfs_init_path(&path); + di = btrfs_lookup_dir_index(trans, root, &path, backref->dir, backref->name, backref->namelen, backref->index, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); - btrfs_free_path(path); + btrfs_release_path(&path); btrfs_commit_transaction(trans, root); if (ret == -ENOENT) return 0; @@ -2341,11 +2276,11 @@ static int delete_dir_index(struct btrfs_root *root, } if (!di) - ret = btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, &path); else - ret = btrfs_delete_one_dir_name(trans, root, path, di); + ret = btrfs_delete_one_dir_name(trans, root, &path, di); BUG_ON(ret); - btrfs_free_path(path); + btrfs_release_path(&path); btrfs_commit_transaction(trans, root); return ret; } @@ -2748,48 +2683,46 @@ out: */ static int find_normal_file_extent(struct btrfs_root *root, u64 ino) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_file_extent_item *fi; u8 type; int ret = 0; - path = btrfs_alloc_path(); - if (!path) - goto out; + btrfs_init_path(&path); key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret < 0) { ret = 0; goto out; } - if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); + if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); if (ret) { ret = 0; goto out; } } while (1) { - btrfs_item_key_to_cpu(path->nodes[0], &found_key, - path->slots[0]); + btrfs_item_key_to_cpu(path.nodes[0], &found_key, + path.slots[0]); if (found_key.objectid != ino || found_key.type != BTRFS_EXTENT_DATA_KEY) break; - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + fi = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_file_extent_item); - type = btrfs_file_extent_type(path->nodes[0], fi); + type = btrfs_file_extent_type(path.nodes[0], fi); if (type != BTRFS_FILE_EXTENT_INLINE) { ret = 1; goto out; } } out: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -2980,7 +2913,7 @@ out: static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) { struct btrfs_trans_handle *trans; - struct btrfs_path *path; + struct btrfs_path path; int ret = 0; if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | @@ -2992,10 +2925,6 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) I_ERR_FILE_NBYTES_WRONG))) return rec->errors; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - /* * For nlink repair, it may create a dir and add link, so * 2 for parent(256)'s dir_index and dir_item @@ -3004,27 +2933,26 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) * 2 for lost+found dir's dir_index and dir_item for the file */ trans = btrfs_start_transaction(root, 7); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } + btrfs_init_path(&path); if (rec->errors & I_ERR_NO_INODE_ITEM) - ret = repair_inode_no_item(trans, root, path, rec); + ret = repair_inode_no_item(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN) - ret = repair_inode_orphan_extent(trans, root, path, rec); + ret = repair_inode_orphan_extent(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT) - ret = repair_inode_discount_extent(trans, root, path, rec); + ret = repair_inode_discount_extent(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG) - ret = repair_inode_isize(trans, root, path, rec); + ret = repair_inode_isize(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM) - ret = repair_inode_orphan_item(trans, root, path, rec); + ret = repair_inode_orphan_item(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG) - ret = repair_inode_nlinks(trans, root, path, rec); + ret = repair_inode_nlinks(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG) - ret = repair_inode_nbytes(trans, root, path, rec); + ret = repair_inode_nbytes(trans, root, &path, rec); btrfs_commit_transaction(trans, root); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -3276,7 +3204,7 @@ static void free_root_record(struct cache_extent *cache) free(backref); } - kfree(rec); + free(rec); } FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record); @@ -3557,7 +3485,7 @@ static int repair_btree(struct btrfs_root *root, struct cache_tree *corrupt_blocks) { struct btrfs_trans_handle *trans; - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_corrupt_block *corrupt; struct cache_extent *cache; struct btrfs_key key; @@ -3568,23 +3496,20 @@ static int repair_btree(struct btrfs_root *root, if (cache_tree_empty(corrupt_blocks)) return 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); fprintf(stderr, "Error starting transaction: %s\n", strerror(-ret)); - goto out_free_path; + return ret; } + btrfs_init_path(&path); cache = first_cache_extent(corrupt_blocks); while (cache) { corrupt = container_of(cache, struct btrfs_corrupt_block, cache); level = corrupt->level; - path->lowest_level = level; + path.lowest_level = level; key.objectid = corrupt->key.objectid; key.type = corrupt->key.type; key.offset = corrupt->key.offset; @@ -3595,22 +3520,22 @@ static int repair_btree(struct btrfs_root *root, * so ins_len set to 0 here. * Balance will be done after all corrupt node/leaf is deleted. */ - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); if (ret < 0) goto out; - offset = btrfs_node_blockptr(path->nodes[level], - path->slots[level]); + offset = btrfs_node_blockptr(path.nodes[level], + path.slots[level]); /* Remove the ptr */ - ret = btrfs_del_ptr(trans, root, path, level, - path->slots[level]); + ret = btrfs_del_ptr(trans, root, &path, level, + path.slots[level]); if (ret < 0) goto out; /* * Remove the corresponding extent * return value is not concerned. */ - btrfs_release_path(path); + btrfs_release_path(&path); ret = btrfs_free_extent(trans, root, offset, root->nodesize, 0, root->root_key.objectid, level - 1, 0); @@ -3623,18 +3548,17 @@ static int repair_btree(struct btrfs_root *root, corrupt = container_of(cache, struct btrfs_corrupt_block, cache); memcpy(&key, &corrupt->key, sizeof(key)); - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) goto out; /* return will always >0 since it won't find the item */ ret = 0; - btrfs_release_path(path); + btrfs_release_path(&path); cache = next_cache_extent(cache); } out: btrfs_commit_transaction(trans, root); -out_free_path: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -3716,6 +3640,11 @@ static int check_fs_root(struct btrfs_root *root, btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); level = root_item->drop_level; path.lowest_level = level; + if (level > btrfs_header_level(root->node) || + level >= BTRFS_MAX_LEVEL) { + error("ignoring invalid drop level: %u", level); + goto skip_walking; + } wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (wret < 0) goto skip_walking; @@ -3899,15 +3828,16 @@ out: static int all_backpointers_checked(struct extent_record *rec, int print_errs) { - struct rb_node *n; + struct list_head *cur = rec->backrefs.next; struct extent_backref *back; struct tree_backref *tback; struct data_backref *dback; u64 found = 0; int err = 0; - for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) { - back = rb_node_to_extent_backref(n); + while(cur != &rec->backrefs) { + back = to_extent_backref(cur); + cur = cur->next; if (!back->found_extent_tree) { err = 1; if (!print_errs) @@ -4010,16 +3940,17 @@ out: return err; } -static void __free_one_backref(struct rb_node *node) -{ - struct extent_backref *back = rb_node_to_extent_backref(node); - - free(back); -} - -static void free_all_extent_backrefs(struct extent_record *rec) +static int free_all_extent_backrefs(struct extent_record *rec) { - rb_free_nodes(&rec->backref_tree, __free_one_backref); + struct extent_backref *back; + struct list_head *cur; + while (!list_empty(&rec->backrefs)) { + cur = rec->backrefs.next; + back = to_extent_backref(cur); + list_del(cur); + free(back); + } + return 0; } static void free_extent_record_cache(struct btrfs_fs_info *fs_info, @@ -4059,7 +3990,7 @@ static int check_owner_ref(struct btrfs_root *root, struct extent_record *rec, struct extent_buffer *buf) { - struct extent_backref *node, *tmp; + struct extent_backref *node; struct tree_backref *back; struct btrfs_root *ref_root; struct btrfs_key key; @@ -4069,8 +4000,7 @@ static int check_owner_ref(struct btrfs_root *root, int found = 0; int ret; - rbtree_postorder_for_each_entry_safe(node, tmp, - &rec->backref_tree, node) { + list_for_each_entry(node, &rec->backrefs, list) { if (node->is_data) continue; if (!node->found_ref) @@ -4115,16 +4045,18 @@ static int check_owner_ref(struct btrfs_root *root, static int is_extent_tree_record(struct extent_record *rec) { - struct extent_backref *ref, *tmp; + struct list_head *cur = rec->backrefs.next; + struct extent_backref *node; struct tree_backref *back; int is_extent = 0; - rbtree_postorder_for_each_entry_safe(ref, tmp, - &rec->backref_tree, node) { - if (ref->is_data) + while(cur != &rec->backrefs) { + node = to_extent_backref(cur); + cur = cur->next; + if (node->is_data) return 0; - back = to_tree_backref(ref); - if (ref->full_backref) + back = to_tree_backref(node); + if (node->full_backref) return 0; if (back->root == BTRFS_EXTENT_TREE_OBJECTID) is_extent = 1; @@ -4368,7 +4300,7 @@ static int try_to_fix_bad_block(struct btrfs_root *root, struct ulist *roots; struct ulist_node *node; struct btrfs_root *search_root; - struct btrfs_path *path; + struct btrfs_path path; struct ulist_iterator iter; struct btrfs_key root_key, key; int ret; @@ -4377,17 +4309,11 @@ static int try_to_fix_bad_block(struct btrfs_root *root, status != BTRFS_TREE_BLOCK_INVALID_OFFSETS) return -EIO; - path = btrfs_alloc_path(); - if (!path) - return -EIO; - - ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, - 0, &roots); - if (ret) { - btrfs_free_path(path); + ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots); + if (ret) return -EIO; - } + btrfs_init_path(&path); ULIST_ITER_INIT(&iter); while ((node = ulist_next(roots, &iter))) { root_key.objectid = node->val; @@ -4407,31 +4333,31 @@ static int try_to_fix_bad_block(struct btrfs_root *root, break; } - path->lowest_level = btrfs_header_level(buf); - path->skip_check_block = 1; - if (path->lowest_level) + path.lowest_level = btrfs_header_level(buf); + path.skip_check_block = 1; + if (path.lowest_level) btrfs_node_key_to_cpu(buf, &key, 0); else btrfs_item_key_to_cpu(buf, &key, 0); - ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1); + ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1); if (ret) { ret = -EIO; btrfs_commit_transaction(trans, search_root); break; } if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) - ret = fix_key_order(trans, search_root, path); + ret = fix_key_order(trans, search_root, &path); else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) - ret = fix_item_offset(trans, search_root, path); + ret = fix_item_offset(trans, search_root, &path); if (ret) { btrfs_commit_transaction(trans, search_root); break; } - btrfs_release_path(path); + btrfs_release_path(&path); btrfs_commit_transaction(trans, search_root); } ulist_free(roots); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -4498,31 +4424,32 @@ static int check_block(struct btrfs_root *root, return ret; } - static struct tree_backref *find_tree_backref(struct extent_record *rec, u64 parent, u64 root) { - struct rb_node *node; - struct tree_backref *back = NULL; - struct tree_backref match = { - .node = { - .is_data = 0, - }, - }; + struct list_head *cur = rec->backrefs.next; + struct extent_backref *node; + struct tree_backref *back; - if (parent) { - match.parent = parent; - match.node.full_backref = 1; - } else { - match.root = root; + while(cur != &rec->backrefs) { + node = to_extent_backref(cur); + cur = cur->next; + if (node->is_data) + continue; + back = to_tree_backref(node); + if (parent > 0) { + if (!node->full_backref) + continue; + if (parent == back->parent) + return back; + } else { + if (node->full_backref) + continue; + if (back->root == root) + return back; + } } - - node = rb_search(&rec->backref_tree, &match.node.node, - (rb_compare_keys)compare_extent_backref, NULL); - if (node) - back = to_tree_backref(rb_node_to_extent_backref(node)); - - return back; + return NULL; } static struct tree_backref *alloc_tree_backref(struct extent_record *rec, @@ -4540,7 +4467,7 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec, ref->root = root; ref->node.full_backref = 0; } - rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref); + list_add_tail(&ref->node.list, &rec->backrefs); return ref; } @@ -4551,32 +4478,35 @@ static struct data_backref *find_data_backref(struct extent_record *rec, int found_ref, u64 disk_bytenr, u64 bytes) { - struct rb_node *node; - struct data_backref *back = NULL; - struct data_backref match = { - .node = { - .is_data = 1, - }, - .owner = owner, - .offset = offset, - .bytes = bytes, - .found_ref = found_ref, - .disk_bytenr = disk_bytenr, - }; + struct list_head *cur = rec->backrefs.next; + struct extent_backref *node; + struct data_backref *back; - if (parent) { - match.parent = parent; - match.node.full_backref = 1; - } else { - match.root = root; + while(cur != &rec->backrefs) { + node = to_extent_backref(cur); + cur = cur->next; + if (!node->is_data) + continue; + back = to_data_backref(node); + if (parent > 0) { + if (!node->full_backref) + continue; + if (parent == back->parent) + return back; + } else { + if (node->full_backref) + continue; + if (back->root == root && back->owner == owner && + back->offset == offset) { + if (found_ref && node->found_ref && + (back->bytes != bytes || + back->disk_bytenr != disk_bytenr)) + continue; + return back; + } + } } - - node = rb_search(&rec->backref_tree, &match.node.node, - (rb_compare_keys)compare_extent_backref, NULL); - if (node) - back = to_data_backref(rb_node_to_extent_backref(node)); - - return back; + return NULL; } static struct data_backref *alloc_data_backref(struct extent_record *rec, @@ -4605,7 +4535,7 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec, ref->bytes = max_size; ref->found_ref = 0; ref->num_refs = 0; - rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref); + list_add_tail(&ref->node.list, &rec->backrefs); if (max_size > rec->max_size) rec->max_size = max_size; return ref; @@ -4638,12 +4568,12 @@ static void check_extent_type(struct extent_record *rec) * Check SYSTEM extent, as it's also marked as metadata, we can only * make sure it's a SYSTEM extent by its backref */ - if (!RB_EMPTY_ROOT(&rec->backref_tree)) { + if (!list_empty(&rec->backrefs)) { struct extent_backref *node; struct tree_backref *tback; u64 bg_type; - node = rb_node_to_extent_backref(rb_first(&rec->backref_tree)); + node = to_extent_backref(rec->backrefs.next); if (node->is_data) { /* tree block shouldn't have data backref */ rec->wrong_chunk_type = 1; @@ -4693,17 +4623,19 @@ static int add_extent_rec_nolookup(struct cache_tree *extent_cache, INIT_LIST_HEAD(&rec->backrefs); INIT_LIST_HEAD(&rec->dups); INIT_LIST_HEAD(&rec->list); - rec->backref_tree = RB_ROOT; memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key)); rec->cache.start = tmpl->start; rec->cache.size = tmpl->nr; ret = insert_cache_extent(extent_cache, &rec->cache); - BUG_ON(ret); + if (ret) { + free(rec); + return ret; + } bytes_used += rec->nr; if (tmpl->metadata) - rec->crossing_stripes = check_crossing_stripes(rec->start, - global_info->tree_root->nodesize); + rec->crossing_stripes = check_crossing_stripes(global_info, + rec->start, global_info->tree_root->nodesize); check_extent_type(rec); return ret; } @@ -4804,7 +4736,8 @@ static int add_extent_rec(struct cache_tree *extent_cache, */ if (tmpl->metadata) rec->crossing_stripes = check_crossing_stripes( - rec->start, global_info->tree_root->nodesize); + global_info, rec->start, + global_info->tree_root->nodesize); check_extent_type(rec); maybe_free_extent_rec(extent_cache, rec); return ret; @@ -4821,6 +4754,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, struct extent_record *rec; struct tree_backref *back; struct cache_extent *cache; + int ret; cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) { @@ -4831,22 +4765,29 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, tmpl.nr = 1; tmpl.metadata = 1; - add_extent_rec_nolookup(extent_cache, &tmpl); + ret = add_extent_rec_nolookup(extent_cache, &tmpl); + if (ret) + return ret; + /* really a bug in cache_extent implement now */ cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) - abort(); + return -ENOENT; } rec = container_of(cache, struct extent_record, cache); if (rec->start != bytenr) { - abort(); + /* + * Several cause, from unaligned bytenr to over lapping extents + */ + return -EEXIST; } back = find_tree_backref(rec, parent, root); if (!back) { back = alloc_tree_backref(rec, parent, root); - BUG_ON(!back); + if (!back) + return -ENOMEM; } if (found_ref) { @@ -4880,6 +4821,7 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, struct extent_record *rec; struct data_backref *back; struct cache_extent *cache; + int ret; cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) { @@ -4890,7 +4832,9 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, tmpl.nr = 1; tmpl.max_size = max_size; - add_extent_rec_nolookup(extent_cache, &tmpl); + ret = add_extent_rec_nolookup(extent_cache, &tmpl); + if (ret) + return ret; cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) @@ -5123,16 +5067,18 @@ static int process_extent_ref_v0(struct cache_tree *extent_cache, { struct btrfs_extent_ref_v0 *ref0; struct btrfs_key key; + int ret; btrfs_item_key_to_cpu(leaf, &key, slot); ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0); if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) { - add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0); + ret = add_tree_backref(extent_cache, key.objectid, key.offset, + 0, 0); } else { - add_data_backref(extent_cache, key.objectid, key.offset, 0, - 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0); + ret = add_data_backref(extent_cache, key.objectid, key.offset, + 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0); } - return 0; + return ret; } #endif @@ -5194,8 +5140,24 @@ static int process_chunk_item(struct cache_tree *chunk_cache, int slot) { struct chunk_record *rec; + struct btrfs_chunk *chunk; int ret = 0; + chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); + /* + * Do extra check for this chunk item, + * + * It's still possible one can craft a leaf with CHUNK_ITEM, with + * wrong onwer(3) out of chunk tree, to pass both chunk tree check + * and owner<->key_type check. + */ + ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot, + key->offset); + if (ret < 0) { + error("chunk(%llu, %llu) is not valid, ignore it", + key->offset, btrfs_chunk_length(eb, chunk)); + return 0; + } rec = btrfs_new_chunk_record(eb, key, slot); ret = insert_cache_extent(chunk_cache, &rec->cache); if (ret) { @@ -5359,6 +5321,7 @@ static int process_extent_item(struct btrfs_root *root, struct extent_record tmpl; unsigned long end; unsigned long ptr; + int ret; int type; u32 item_size = btrfs_item_size_nr(eb, slot); u64 refs = 0; @@ -5375,6 +5338,11 @@ static int process_extent_item(struct btrfs_root *root, num_bytes = key.offset; } + if (!IS_ALIGNED(key.objectid, root->sectorsize)) { + error("ignoring invalid extent, bytenr %llu is not aligned to %u", + key.objectid, root->sectorsize); + return -EIO; + } if (item_size < sizeof(*ei)) { #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 struct btrfs_extent_item_v0 *ei0; @@ -5401,6 +5369,16 @@ static int process_extent_item(struct btrfs_root *root, metadata = 1; else metadata = 0; + if (metadata && num_bytes != root->nodesize) { + error("ignore invalid metadata extent, length %llu does not equal to %u", + num_bytes, root->nodesize); + return -EIO; + } + if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) { + error("ignore invalid data extent, length %llu is not aligned to %u", + num_bytes, root->sectorsize); + return -EIO; + } memset(&tmpl, 0, sizeof(tmpl)); tmpl.start = key.objectid; @@ -5423,12 +5401,18 @@ static int process_extent_item(struct btrfs_root *root, offset = btrfs_extent_inline_ref_offset(eb, iref); switch (type) { case BTRFS_TREE_BLOCK_REF_KEY: - add_tree_backref(extent_cache, key.objectid, - 0, offset, 0); + ret = add_tree_backref(extent_cache, key.objectid, + 0, offset, 0); + if (ret < 0) + error("add_tree_backref failed: %s", + strerror(-ret)); break; case BTRFS_SHARED_BLOCK_REF_KEY: - add_tree_backref(extent_cache, key.objectid, - offset, 0, 0); + ret = add_tree_backref(extent_cache, key.objectid, + offset, 0, 0); + if (ret < 0) + error("add_tree_backref failed: %s", + strerror(-ret)); break; case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); @@ -5484,7 +5468,7 @@ static int check_cache_range(struct btrfs_root *root, continue; if (logical[nr] == offset) { if (stripe_len >= bytes) { - kfree(logical); + free(logical); return 0; } bytes -= stripe_len; @@ -5492,7 +5476,7 @@ static int check_cache_range(struct btrfs_root *root, } else if (logical[nr] < offset) { if (logical[nr] + stripe_len >= offset + bytes) { - kfree(logical); + free(logical); return 0; } bytes = (offset + bytes) - @@ -5515,7 +5499,7 @@ static int check_cache_range(struct btrfs_root *root, offset, logical[nr] - offset); if (ret) { - kfree(logical); + free(logical); return ret; } @@ -5526,7 +5510,7 @@ static int check_cache_range(struct btrfs_root *root, } } - kfree(logical); + free(logical); } entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes); @@ -5556,31 +5540,27 @@ static int check_cache_range(struct btrfs_root *root, static int verify_space_cache(struct btrfs_root *root, struct btrfs_block_group_cache *cache) { - struct btrfs_path *path; + struct btrfs_path path; struct extent_buffer *leaf; struct btrfs_key key; u64 last; int ret = 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - root = root->fs_info->extent_root; last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET); + btrfs_init_path(&path); key.objectid = last; key.offset = 0; key.type = BTRFS_EXTENT_ITEM_KEY; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret < 0) goto out; ret = 0; while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); if (ret < 0) goto out; if (ret > 0) { @@ -5588,13 +5568,13 @@ static int verify_space_cache(struct btrfs_root *root, break; } } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.objectid >= cache->key.offset + cache->key.objectid) break; if (key.type != BTRFS_EXTENT_ITEM_KEY && key.type != BTRFS_METADATA_ITEM_KEY) { - path->slots[0]++; + path.slots[0]++; continue; } @@ -5603,7 +5583,7 @@ static int verify_space_cache(struct btrfs_root *root, last = key.objectid + key.offset; else last = key.objectid + root->nodesize; - path->slots[0]++; + path.slots[0]++; continue; } @@ -5615,7 +5595,7 @@ static int verify_space_cache(struct btrfs_root *root, last = key.objectid + key.offset; else last = key.objectid + root->nodesize; - path->slots[0]++; + path.slots[0]++; } if (last < cache->key.objectid + cache->key.offset) @@ -5624,7 +5604,7 @@ static int verify_space_cache(struct btrfs_root *root, cache->key.offset - last); out: - btrfs_free_path(path); + btrfs_release_path(&path); if (!ret && !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) { @@ -5750,7 +5730,7 @@ again: csum = btrfs_csum_data(NULL, (char *)data + tmp, csum, root->sectorsize); - btrfs_csum_final(csum, (char *)&csum); + btrfs_csum_final(csum, (u8 *)&csum); csum_offset = leaf_offset + tmp / root->sectorsize * csum_size; @@ -5781,33 +5761,28 @@ out: static int check_extent_exists(struct btrfs_root *root, u64 bytenr, u64 num_bytes) { - struct btrfs_path *path; + struct btrfs_path path; struct extent_buffer *leaf; struct btrfs_key key; int ret; - path = btrfs_alloc_path(); - if (!path) { - fprintf(stderr, "Error allocating path\n"); - return -ENOMEM; - } - + btrfs_init_path(&path); key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = (u64)-1; again: - ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path, 0, 0); if (ret < 0) { fprintf(stderr, "Error looking up extent record %d\n", ret); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } else if (ret) { - if (path->slots[0] > 0) { - path->slots[0]--; + if (path.slots[0] > 0) { + path.slots[0]--; } else { - ret = btrfs_prev_leaf(root, path); + ret = btrfs_prev_leaf(root, &path); if (ret < 0) { goto out; } else if (ret > 0) { @@ -5817,7 +5792,7 @@ again: } } - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); /* * Block group items come before extent items if they have the same @@ -5828,10 +5803,10 @@ again: * EXTENT_ITEM_KEY please? */ while (key.type > BTRFS_EXTENT_ITEM_KEY) { - if (path->slots[0] > 0) { - path->slots[0]--; + if (path.slots[0] > 0) { + path.slots[0]--; } else { - ret = btrfs_prev_leaf(root, path); + ret = btrfs_prev_leaf(root, &path); if (ret < 0) { goto out; } else if (ret > 0) { @@ -5839,29 +5814,29 @@ again: goto out; } } - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); } while (num_bytes) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); if (ret < 0) { fprintf(stderr, "Error going to next leaf " "%d\n", ret); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } else if (ret) { break; } } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.type != BTRFS_EXTENT_ITEM_KEY) { - path->slots[0]++; + path.slots[0]++; continue; } if (key.objectid + key.offset < bytenr) { - path->slots[0]++; + path.slots[0]++; continue; } if (key.objectid > bytenr + num_bytes) @@ -5894,7 +5869,7 @@ again: * in real life, but no harm in coding it up * anyway just in case. */ - btrfs_release_path(path); + btrfs_release_path(&path); ret = check_extent_exists(root, new_start, new_bytes); if (ret) { @@ -5907,7 +5882,7 @@ again: } num_bytes = key.objectid - bytenr; } - path->slots[0]++; + path.slots[0]++; } ret = 0; @@ -5918,13 +5893,13 @@ out: ret = 1; } - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } static int check_csums(struct btrfs_root *root) { - struct btrfs_path *path; + struct btrfs_path path; struct extent_buffer *leaf; struct btrfs_key key; u64 offset = 0, num_bytes = 0; @@ -5940,28 +5915,24 @@ static int check_csums(struct btrfs_root *root) return -ENOENT; } + btrfs_init_path(&path); key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key.type = BTRFS_EXTENT_CSUM_KEY; key.offset = 0; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret < 0) { fprintf(stderr, "Error searching csum tree %d\n", ret); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } - if (ret > 0 && path->slots[0]) - path->slots[0]--; + if (ret > 0 && path.slots[0]) + path.slots[0]--; ret = 0; while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); if (ret < 0) { fprintf(stderr, "Error going to next leaf " "%d\n", ret); @@ -5970,19 +5941,19 @@ static int check_csums(struct btrfs_root *root) if (ret) break; } - leaf = path->nodes[0]; + leaf = path.nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.type != BTRFS_EXTENT_CSUM_KEY) { - path->slots[0]++; + path.slots[0]++; continue; } - data_len = (btrfs_item_size_nr(leaf, path->slots[0]) / + data_len = (btrfs_item_size_nr(leaf, path.slots[0]) / csum_size) * root->sectorsize; if (!check_data_csum) goto skip_csum_check; - leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]); + leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]); ret = check_extent_csums(root, key.offset, data_len, leaf_offset, leaf); if (ret) @@ -6002,10 +5973,10 @@ skip_csum_check: num_bytes = 0; } num_bytes += data_len; - path->slots[0]++; + path.slots[0]++; } - btrfs_free_path(path); + btrfs_release_path(&path); return errors; } @@ -6057,7 +6028,9 @@ static int calc_extent_flag(struct btrfs_root *root, cache = lookup_cache_extent(extent_cache, buf->start, 1); /* we have added this extent before */ - BUG_ON(!cache); + if (!cache) + return -ENOENT; + rec = container_of(cache, struct extent_record, cache); /* @@ -6096,6 +6069,58 @@ full_backref: return 0; } +static void report_mismatch_key_root(u8 key_type, u64 rootid) +{ + fprintf(stderr, "Invalid key type("); + print_key_type(stderr, 0, key_type); + fprintf(stderr, ") found in root("); + print_objectid(stderr, rootid, 0); + fprintf(stderr, ")\n"); +} + +/* + * Check if the key is valid with its extent buffer. + * + * This is a early check in case invalid key exists in a extent buffer + * This is not comprehensive yet, but should prevent wrong key/item passed + * further + */ +static int check_type_with_root(u64 rootid, u8 key_type) +{ + switch (key_type) { + /* Only valid in chunk tree */ + case BTRFS_DEV_ITEM_KEY: + case BTRFS_CHUNK_ITEM_KEY: + if (rootid != BTRFS_CHUNK_TREE_OBJECTID) + goto err; + break; + /* valid in csum and log tree */ + case BTRFS_CSUM_TREE_OBJECTID: + if (!(rootid == BTRFS_TREE_LOG_OBJECTID || + is_fstree(rootid))) + goto err; + break; + case BTRFS_EXTENT_ITEM_KEY: + case BTRFS_METADATA_ITEM_KEY: + case BTRFS_BLOCK_GROUP_ITEM_KEY: + if (rootid != BTRFS_EXTENT_TREE_OBJECTID) + goto err; + break; + case BTRFS_ROOT_ITEM_KEY: + if (rootid != BTRFS_ROOT_TREE_OBJECTID) + goto err; + break; + case BTRFS_DEV_EXTENT_KEY: + if (rootid != BTRFS_DEV_TREE_OBJECTID) + goto err; + break; + } + return 0; +err: + report_mismatch_key_root(key_type, rootid); + return -EINVAL; +} + static int run_next_block(struct btrfs_root *root, struct block_info *bits, int bits_nr, @@ -6245,6 +6270,16 @@ static int run_next_block(struct btrfs_root *root, for (i = 0; i < nritems; i++) { struct btrfs_file_extent_item *fi; btrfs_item_key_to_cpu(buf, &key, i); + /* + * Check key type against the leaf owner. + * Could filter quite a lot of early error if + * owner is correct + */ + if (check_type_with_root(btrfs_header_owner(buf), + key.type)) { + fprintf(stderr, "ignoring invalid key\n"); + continue; + } if (key.type == BTRFS_EXTENT_ITEM_KEY) { process_extent_item(root, extent_cache, buf, i); @@ -6289,13 +6324,19 @@ static int run_next_block(struct btrfs_root *root, } if (key.type == BTRFS_TREE_BLOCK_REF_KEY) { - add_tree_backref(extent_cache, key.objectid, 0, - key.offset, 0); + ret = add_tree_backref(extent_cache, + key.objectid, 0, key.offset, 0); + if (ret < 0) + error("add_tree_backref failed: %s", + strerror(-ret)); continue; } if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { - add_tree_backref(extent_cache, key.objectid, - key.offset, 0, 0); + ret = add_tree_backref(extent_cache, + key.objectid, key.offset, 0, 0); + if (ret < 0) + error("add_tree_backref failed: %s", + strerror(-ret)); continue; } if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { @@ -6393,9 +6434,16 @@ static int run_next_block(struct btrfs_root *root, tmpl.metadata = 1; tmpl.max_size = size; ret = add_extent_rec(extent_cache, &tmpl); - BUG_ON(ret); + if (ret < 0) + goto out; - add_tree_backref(extent_cache, ptr, parent, owner, 1); + ret = add_tree_backref(extent_cache, ptr, parent, + owner, 1); + if (ret < 0) { + error("add_tree_backref failed: %s", + strerror(-ret)); + continue; + } if (level > 1) { add_pending(nodes, seen, ptr, size); @@ -6429,6 +6477,7 @@ static int add_root_to_pending(struct extent_buffer *buf, u64 objectid) { struct extent_record tmpl; + int ret; if (btrfs_header_level(buf) > 0) add_pending(nodes, seen, buf->start, buf->len); @@ -6446,11 +6495,12 @@ static int add_root_to_pending(struct extent_buffer *buf, if (objectid == BTRFS_TREE_RELOC_OBJECTID || btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) - add_tree_backref(extent_cache, buf->start, buf->start, - 0, 1); + ret = add_tree_backref(extent_cache, buf->start, buf->start, + 0, 1); else - add_tree_backref(extent_cache, buf->start, 0, objectid, 1); - return 0; + ret = add_tree_backref(extent_cache, buf->start, 0, objectid, + 1); + return ret; } /* as we fix the tree, we might be deleting blocks that @@ -6496,7 +6546,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans, back->node.found_extent_tree = 0; if (!back->node.found_extent_tree && back->node.found_ref) { - rb_erase(&back->node.node, &rec->backref_tree); + list_del(&back->node.list); free(back); } } else { @@ -6515,7 +6565,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans, back->node.found_extent_tree = 0; } if (!back->node.found_extent_tree && back->node.found_ref) { - rb_erase(&back->node.node, &rec->backref_tree); + list_del(&back->node.list); free(back); } } @@ -6620,7 +6670,6 @@ static int record_extent(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key ins_key; struct btrfs_extent_item *ei; - struct tree_backref *tback; struct data_backref *dback; struct btrfs_tree_block_info *bi; @@ -6656,7 +6705,6 @@ static int record_extent(struct btrfs_trans_handle *trans, } else { struct btrfs_disk_key copy_key;; - tback = to_tree_backref(back); bi = (struct btrfs_tree_block_info *)(ei + 1); memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); @@ -6722,6 +6770,7 @@ static int record_extent(struct btrfs_trans_handle *trans, dback->found_ref); } else { u64 parent; + struct tree_backref *tback; tback = to_tree_backref(back); if (back->full_backref) @@ -6759,11 +6808,6 @@ static struct extent_entry *find_most_right_entry(struct list_head *entries) struct extent_entry *entry, *best = NULL, *prev = NULL; list_for_each_entry(entry, entries, list) { - if (!prev) { - prev = entry; - continue; - } - /* * If there are as many broken entries as entries then we know * not to trust this particular entry. @@ -6772,6 +6816,16 @@ static struct extent_entry *find_most_right_entry(struct list_head *entries) continue; /* + * Special case, when there are only two entries and 'best' is + * the first one + */ + if (!prev) { + best = entry; + prev = entry; + continue; + } + + /* * If our current entry == best then we can't be sure our best * is really the best, so we need to keep searching. */ @@ -6952,7 +7006,7 @@ out: static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct extent_record *rec) { - struct extent_backref *back, *tmp; + struct extent_backref *back; struct data_backref *dback; struct extent_entry *entry, *best = NULL; LIST_HEAD(entries); @@ -6968,8 +7022,7 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, if (rec->metadata) return 0; - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { + list_for_each_entry(back, &rec->backrefs, list) { if (back->full_backref || !back->is_data) continue; @@ -7095,8 +7148,7 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, * Ok great we all agreed on an extent record, let's go find the real * references and fix up the ones that don't match. */ - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { + list_for_each_entry(back, &rec->backrefs, list) { if (back->full_backref || !back->is_data) continue; @@ -7219,17 +7271,13 @@ static int delete_duplicate_records(struct btrfs_root *root, { struct btrfs_trans_handle *trans; LIST_HEAD(delete_list); - struct btrfs_path *path; + struct btrfs_path path; struct extent_record *tmp, *good, *n; int nr_del = 0; int ret = 0, err; struct btrfs_key key; - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } + btrfs_init_path(&path); good = rec; /* Find the record that covers all of the duplicates. */ @@ -7281,16 +7329,16 @@ static int delete_duplicate_records(struct btrfs_root *root, abort(); } - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret) { if (ret > 0) ret = -EINVAL; break; } - ret = btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, &path); if (ret) break; - btrfs_release_path(path); + btrfs_release_path(&path); nr_del++; } err = btrfs_commit_transaction(trans, root); @@ -7311,7 +7359,7 @@ out: free(tmp); } - btrfs_free_path(path); + btrfs_release_path(&path); if (!ret && !nr_del) rec->num_duplicates = 0; @@ -7325,7 +7373,7 @@ static int find_possible_backrefs(struct btrfs_fs_info *info, struct extent_record *rec) { struct btrfs_root *root; - struct extent_backref *back, *tmp; + struct extent_backref *back; struct data_backref *dback; struct cache_extent *cache; struct btrfs_file_extent_item *fi; @@ -7333,8 +7381,7 @@ static int find_possible_backrefs(struct btrfs_fs_info *info, u64 bytenr, bytes; int ret; - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { + list_for_each_entry(back, &rec->backrefs, list) { /* Don't care about full backrefs (poor unloved backrefs) */ if (back->full_backref || !back->is_data) continue; @@ -7422,20 +7469,17 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, { struct btrfs_key key; struct btrfs_root *dest_root; - struct extent_backref *back, *tmp; + struct extent_backref *back; struct data_backref *dback; struct orphan_data_extent *orphan; - struct btrfs_path *path; + struct btrfs_path path; int recorded_data_ref = 0; int ret = 0; if (rec->metadata) return 1; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { + btrfs_init_path(&path); + list_for_each_entry(back, &rec->backrefs, list) { if (back->full_backref || !back->is_data || !back->found_extent_tree) continue; @@ -7456,7 +7500,8 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = dback->offset; - ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0); + btrfs_release_path(&path); /* * For ret < 0, it's OK since the fs-tree may be corrupted, * we need to record it for inode/file extent rebuild. @@ -7483,7 +7528,7 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, recorded_data_ref = 1; } out: - btrfs_free_path(path); + btrfs_release_path(&path); if (!ret) return !recorded_data_ref; else @@ -7501,19 +7546,17 @@ static int fixup_extent_refs(struct btrfs_fs_info *info, { struct btrfs_trans_handle *trans = NULL; int ret; - struct btrfs_path *path; + struct btrfs_path path; + struct list_head *cur = rec->backrefs.next; struct cache_extent *cache; - struct extent_backref *back, *tmp; + struct extent_backref *back; int allocated = 0; u64 flags = 0; if (rec->flag_block_full_backref) flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); if (rec->refs != rec->extent_item_refs && !rec->metadata) { /* * Sometimes the backrefs themselves are so broken they don't @@ -7522,13 +7565,13 @@ static int fixup_extent_refs(struct btrfs_fs_info *info, * them into the list if we find the backref so that * verify_backrefs can figure out what to do. */ - ret = find_possible_backrefs(info, path, extent_cache, rec); + ret = find_possible_backrefs(info, &path, extent_cache, rec); if (ret < 0) goto out; } /* step one, make sure all of the backrefs agree */ - ret = verify_backrefs(info, path, rec); + ret = verify_backrefs(info, &path, rec); if (ret < 0) goto out; @@ -7539,7 +7582,7 @@ static int fixup_extent_refs(struct btrfs_fs_info *info, } /* step two, delete all the existing records */ - ret = delete_extent_records(trans, info->extent_root, path, + ret = delete_extent_records(trans, info->extent_root, &path, rec->start, rec->max_size); if (ret < 0) @@ -7554,8 +7597,10 @@ static int fixup_extent_refs(struct btrfs_fs_info *info, } /* step three, recreate all the refs we did find */ - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { + while(cur != &rec->backrefs) { + back = to_extent_backref(cur); + cur = cur->next; + /* * if we didn't find any references, don't create a * new extent record @@ -7564,7 +7609,7 @@ static int fixup_extent_refs(struct btrfs_fs_info *info, continue; rec->bad_full_backref = 0; - ret = record_extent(trans, info, path, rec, back, allocated, flags); + ret = record_extent(trans, info, &path, rec, back, allocated, flags); allocated = 1; if (ret) @@ -7577,7 +7622,7 @@ out: ret = err; } - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -7586,7 +7631,7 @@ static int fixup_extent_flags(struct btrfs_fs_info *fs_info, { struct btrfs_trans_handle *trans; struct btrfs_root *root = fs_info->extent_root; - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_extent_item *ei; struct btrfs_key key; u64 flags; @@ -7601,32 +7646,27 @@ static int fixup_extent_flags(struct btrfs_fs_info *fs_info, key.offset = rec->max_size; } - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); if (ret < 0) { - btrfs_free_path(path); + btrfs_release_path(&path); btrfs_commit_transaction(trans, root); return ret; } else if (ret) { fprintf(stderr, "Didn't find extent for %llu\n", (unsigned long long)rec->start); - btrfs_free_path(path); + btrfs_release_path(&path); btrfs_commit_transaction(trans, root); return -ENOENT; } - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_extent_item); - flags = btrfs_extent_flags(path->nodes[0], ei); + flags = btrfs_extent_flags(path.nodes[0], ei); if (rec->flag_block_full_backref) { fprintf(stderr, "setting full backref on %llu\n", (unsigned long long)key.objectid); @@ -7636,9 +7676,9 @@ static int fixup_extent_flags(struct btrfs_fs_info *fs_info, (unsigned long long)key.objectid); flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; } - btrfs_set_extent_flags(path->nodes[0], ei, flags); - btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); + btrfs_set_extent_flags(path.nodes[0], ei, flags); + btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_release_path(&path); return btrfs_commit_transaction(trans, root); } @@ -8301,8 +8341,10 @@ static int deal_root_from_list(struct list_head *list, ret = -EIO; break; } - add_root_to_pending(buf, extent_cache, pending, + ret = add_root_to_pending(buf, extent_cache, pending, seen, nodes, rec->objectid); + if (ret < 0) + break; /* * To rebuild extent tree, we need deal with snapshot * one by one, otherwise we deal with node firstly which @@ -8418,7 +8460,7 @@ again: btrfs_init_path(&path); key.offset = 0; key.objectid = 0; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.type = BTRFS_ROOT_ITEM_KEY; ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0, 0); if (ret < 0) @@ -8434,7 +8476,7 @@ again: slot = path.slots[0]; } btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); - if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) { + if (found_key.type == BTRFS_ROOT_ITEM_KEY) { unsigned long offset; u64 last_snapshot; @@ -8995,9 +9037,10 @@ static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id, free_extent_buffer(eb); btrfs_init_path(&path); + path.lowest_level = level; /* Search with the first key, to ensure we can reach it */ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret) { + if (ret < 0) { err |= REFERENCER_MISSING; goto release_out; } @@ -9034,106 +9077,1085 @@ out: return err; } -static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int overwrite) +/* + * Check referencer for shared block backref + * If level == -1, this function will resolve the level. + */ +static int check_shared_block_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr, int level) { - struct extent_buffer *c; - struct extent_buffer *old = root->node; - int level; - int ret; - struct btrfs_disk_key disk_key = {0,0,0}; + struct extent_buffer *eb; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 nr; + int found_parent = 0; + int i; - level = 0; + eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + if (!extent_buffer_uptodate(eb)) + goto out; - if (overwrite) { - c = old; - extent_buffer_get(c); - goto init; + if (level == -1) + level = query_tree_block_level(fs_info, bytenr); + if (level < 0) + goto out; + + if (level + 1 != btrfs_header_level(eb)) + goto out; + + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + if (bytenr == btrfs_node_blockptr(eb, i)) { + found_parent = 1; + break; + } } - c = btrfs_alloc_free_block(trans, root, - root->nodesize, - root->root_key.objectid, - &disk_key, level, 0, 0); - if (IS_ERR(c)) { - c = old; - extent_buffer_get(c); - overwrite = 1; +out: + free_extent_buffer(eb); + if (!found_parent) { + error( + "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)", + bytenr, nodesize, parent, level); + return REFERENCER_MISSING; } -init: - memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_level(c, level); - btrfs_set_header_bytenr(c, c->start); - btrfs_set_header_generation(c, trans->transid); - btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(c, root->root_key.objectid); + return 0; +} - write_extent_buffer(c, root->fs_info->fsid, - btrfs_header_fsid(), BTRFS_FSID_SIZE); +/* + * Check referencer for normal (inlined) data ref + * If len == 0, it will be resolved by searching in extent tree + */ +static int check_extent_data_backref(struct btrfs_fs_info *fs_info, + u64 root_id, u64 objectid, u64 offset, + u64 bytenr, u64 len, u32 count) +{ + struct btrfs_root *root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + u32 found_count = 0; + int slot; + int ret = 0; - write_extent_buffer(c, root->fs_info->chunk_tree_uuid, - btrfs_header_chunk_tree_uuid(c), - BTRFS_UUID_SIZE); + if (!len) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; - btrfs_mark_buffer_dirty(c); + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) + goto out; + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) + goto out; + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid != bytenr || + key.type != BTRFS_EXTENT_ITEM_KEY) + goto out; + len = key.offset; + btrfs_release_path(&path); + } + key.objectid = root_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + btrfs_init_path(&path); + + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) + goto out; + + key.objectid = objectid; + key.type = BTRFS_EXTENT_DATA_KEY; /* - * this case can happen in the following case: - * - * 1.overwrite previous root. - * - * 2.reinit reloc data root, this is because we skip pin - * down reloc data tree before which means we can allocate - * same block bytenr here. + * It can be nasty as data backref offset is + * file offset - file extent offset, which is smaller or + * equal to original backref offset. The only special case is + * overflow. So we need to special check and do further search. */ - if (old->start == c->start) { - btrfs_set_root_generation(&root->root_item, - trans->transid); - root->root_item.level = btrfs_header_level(root->node); - ret = btrfs_update_root(trans, root->fs_info->tree_root, - &root->root_key, &root->root_item); - if (ret) { - free_extent_buffer(c); - return ret; - } + key.offset = offset & (1ULL << 63) ? 0 : offset; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + + /* + * Search afterwards to get correct one + * NOTE: As we must do a comprehensive check on the data backref to + * make sure the dref count also matches, we must iterate all file + * extents for that inode. + */ + while (1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) + break; + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + /* + * Except normal disk bytenr and disk num bytes, we still + * need to do extra check on dbackref offset as + * dbackref offset = file_offset - file_extent_offset + */ + if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr && + btrfs_file_extent_disk_num_bytes(leaf, fi) == len && + (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) == + offset) + found_count++; + + ret = btrfs_next_item(root, &path); + if (ret) + break; + } +out: + btrfs_release_path(&path); + if (found_count != count) { + error( +"extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u", + bytenr, len, root_id, objectid, offset, count, found_count); + return REFERENCER_MISSING; } - free_extent_buffer(old); - root->node = c; - add_root_to_dirty_list(root); return 0; } -static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int tree_root) +/* + * Check if the referencer of a shared data backref exists + */ +static int check_shared_data_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr) { - struct extent_buffer *tmp; - struct btrfs_root_item *ri; + struct extent_buffer *eb; struct btrfs_key key; - u64 bytenr; - u32 nodesize; - int level = btrfs_header_level(eb); - int nritems; - int ret; + struct btrfs_file_extent_item *fi; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 nr; + int found_parent = 0; int i; - /* - * If we have pinned this block before, don't pin it again. - * This can not only avoid forever loop with broken filesystem - * but also give us some speedups. - */ - if (test_range_bit(&fs_info->pinned_extents, eb->start, - eb->start + eb->len - 1, EXTENT_DIRTY, 0)) - return 0; + eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + if (!extent_buffer_uptodate(eb)) + goto out; - btrfs_pin_extent(fs_info, eb->start, eb->len); + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; - nodesize = btrfs_super_nodesize(fs_info->super_copy); - nritems = btrfs_header_nritems(eb); - for (i = 0; i < nritems; i++) { - if (level == 0) { - btrfs_item_key_to_cpu(eb, &key, i); - if (key.type != BTRFS_ROOT_ITEM_KEY) - continue; - /* Skip the extent root and reloc roots */ - if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID || + fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE) + continue; + + if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) { + found_parent = 1; + break; + } + } + +out: + free_extent_buffer(eb); + if (!found_parent) { + error("shared extent %llu referencer lost (parent: %llu)", + bytenr, parent); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * This function will check a given extent item, including its backref and + * itself (like crossing stripe boundary and type) + * + * Since we don't use extent_record anymore, introduce new error bit + */ +static int check_extent_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + unsigned long end; + unsigned long ptr; + int type; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 flags; + u64 offset; + int metadata = 0; + int level; + struct btrfs_key key; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.type == BTRFS_EXTENT_ITEM_KEY) + bytes_used += key.offset; + else + bytes_used += nodesize; + + if (item_size < sizeof(*ei)) { + /* + * COMPAT_EXTENT_TREE_V0 case, but it's already a super + * old thing when on disk format is still un-determined. + * No need to care about it anymore + */ + error("unsupported COMPAT_EXTENT_TREE_V0 detected"); + return -ENOTTY; + } + + ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); + flags = btrfs_extent_flags(eb, ei); + + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; + if (metadata && check_crossing_stripes(global_info, key.objectid, + eb->len)) { + error("bad metadata [%llu, %llu) crossing stripe boundary", + key.objectid, key.objectid + nodesize); + err |= CROSSING_STRIPE_BOUNDARY; + } + + ptr = (unsigned long)(ei + 1); + + if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) { + /* Old EXTENT_ITEM metadata */ + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)ptr; + level = btrfs_tree_block_level(eb, info); + ptr += sizeof(struct btrfs_tree_block_info); + } else { + /* New METADATA_ITEM */ + level = key.offset; + } + end = (unsigned long)ei + item_size; + + if (ptr >= end) { + err |= ITEM_SIZE_MISMATCH; + goto out; + } + + /* Now check every backref in this extent item */ +next: + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(eb, iref); + offset = btrfs_extent_inline_ref_offset(eb, iref); + switch (type) { + case BTRFS_TREE_BLOCK_REF_KEY: + ret = check_tree_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = check_shared_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + ret = check_extent_data_backref(fs_info, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, dref), + btrfs_extent_data_ref_offset(eb, dref), + key.objectid, key.offset, + btrfs_extent_data_ref_count(eb, dref)); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + ret = check_shared_data_backref(fs_info, offset, key.objectid); + err |= ret; + break; + default: + error("extent[%llu %d %llu] has unknown ref type: %d", + key.objectid, key.type, key.offset, type); + err |= UNKNOWN_TYPE; + goto out; + } + + ptr += btrfs_extent_inline_ref_size(type); + if (ptr < end) + goto next; + +out: + return err; +} + +/* + * Check if a dev extent item is referred correctly by its chunk + */ +static int check_dev_extent_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_dev_extent *ptr; + struct btrfs_path path; + struct btrfs_key chunk_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; + struct extent_buffer *l; + int num_stripes; + u64 length; + int i; + int found_chunk = 0; + int ret; + + btrfs_item_key_to_cpu(eb, &devext_key, slot); + ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent); + length = btrfs_dev_extent_length(eb, ptr); + + chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr); + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr); + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) + goto out; + + l = path.nodes[0]; + chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk); + if (btrfs_chunk_length(l, chunk) != length) + goto out; + + num_stripes = btrfs_chunk_num_stripes(l, chunk); + for (i = 0; i < num_stripes; i++) { + u64 devid = btrfs_stripe_devid_nr(l, chunk, i); + u64 offset = btrfs_stripe_offset_nr(l, chunk, i); + + if (devid == devext_key.objectid && + offset == devext_key.offset) { + found_chunk = 1; + break; + } + } +out: + btrfs_release_path(&path); + if (!found_chunk) { + error( + "device extent[%llu, %llu, %llu] did not find the related chunk", + devext_key.objectid, devext_key.offset, length); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check if the used space is correct with the dev item + */ +static int check_dev_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_dev_item *dev_item; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_dev_extent *ptr; + u64 dev_id; + u64 used; + u64 total = 0; + int ret; + + dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item); + dev_id = btrfs_device_id(eb, dev_item); + used = btrfs_device_bytes_used(eb, dev_item); + + key.objectid = dev_id; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0); + if (ret < 0) { + btrfs_item_key_to_cpu(eb, &key, slot); + error("cannot find any related dev extent for dev[%llu, %u, %llu]", + key.objectid, key.type, key.offset); + btrfs_release_path(&path); + return REFERENCER_MISSING; + } + + /* Iterate dev_extents to calculate the used space of a device */ + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + + if (key.objectid > dev_id) + break; + if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id) + goto next; + + ptr = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_dev_extent); + total += btrfs_dev_extent_length(path.nodes[0], ptr); +next: + ret = btrfs_next_item(dev_root, &path); + if (ret) + break; + } + btrfs_release_path(&path); + + if (used != total) { + btrfs_item_key_to_cpu(eb, &key, slot); + error( +"Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]", + total, used, BTRFS_ROOT_TREE_OBJECTID, + BTRFS_DEV_EXTENT_KEY, dev_id); + return ACCOUNTING_MISMATCH; + } + return 0; +} + +/* + * Check a block group item with its referener (chunk) and its used space + * with extent/metadata item + */ +static int check_block_group_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_path path; + struct btrfs_key bg_key; + struct btrfs_key chunk_key; + struct btrfs_key extent_key; + struct btrfs_chunk *chunk; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u64 flags; + u64 bg_flags; + u64 used; + u64 total = 0; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &bg_key, slot); + bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item); + read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item)); + used = btrfs_block_group_used(&bg_item); + bg_flags = btrfs_block_group_flags(&bg_item); + + chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = bg_key.objectid; + + btrfs_init_path(&path); + /* Search for the referencer chunk */ + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) { + error( + "block group[%llu %llu] did not find the related chunk item", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISSING; + } else { + chunk = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_chunk); + if (btrfs_chunk_length(path.nodes[0], chunk) != + bg_key.offset) { + error( + "block group[%llu %llu] related chunk item length does not match", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISMATCH; + } + } + btrfs_release_path(&path); + + /* Search from the block group bytenr */ + extent_key.objectid = bg_key.objectid; + extent_key.type = 0; + extent_key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0); + if (ret < 0) + goto out; + + /* Iterate extent tree to account used space */ + while (1) { + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]); + if (extent_key.objectid >= bg_key.objectid + bg_key.offset) + break; + + if (extent_key.type != BTRFS_METADATA_ITEM_KEY && + extent_key.type != BTRFS_EXTENT_ITEM_KEY) + goto next; + if (extent_key.objectid < bg_key.objectid) + goto next; + + if (extent_key.type == BTRFS_METADATA_ITEM_KEY) + total += nodesize; + else + total += extent_key.offset; + + ei = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + if (flags & BTRFS_EXTENT_FLAG_DATA) { + if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + extent_key.offset); + err |= CHUNK_TYPE_MISMATCH; + } + } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + nodesize); + err |= CHUNK_TYPE_MISMATCH; + } + } +next: + ret = btrfs_next_item(extent_root, &path); + if (ret) + break; + } + +out: + btrfs_release_path(&path); + + if (total != used) { + error( + "block group[%llu %llu] used %llu but extent items used %llu", + bg_key.objectid, bg_key.offset, used, total); + err |= ACCOUNTING_MISMATCH; + } + return err; +} + +/* + * Check a chunk item. + * Including checking all referred dev_extents and block group + */ +static int check_chunk_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_path path; + struct btrfs_key chunk_key; + struct btrfs_key bg_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; + struct extent_buffer *leaf; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_dev_extent *ptr; + u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy); + u64 length; + u64 chunk_end; + u64 type; + u64 profile; + int num_stripes; + u64 offset; + u64 objectid; + int i; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &chunk_key, slot); + chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); + length = btrfs_chunk_length(eb, chunk); + chunk_end = chunk_key.offset + length; + if (!IS_ALIGNED(length, sectorsize)) { + error("chunk[%llu %llu) not aligned to %u", + chunk_key.offset, chunk_end, sectorsize); + err |= BYTES_UNALIGNED; + goto out; + } + + type = btrfs_chunk_type(eb, chunk); + profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK; + if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + error("chunk[%llu %llu) has no chunk type", + chunk_key.offset, chunk_end); + err |= UNKNOWN_TYPE; + } + if (profile && (profile & (profile - 1))) { + error("chunk[%llu %llu) multiple profiles detected: %llx", + chunk_key.offset, chunk_end, profile); + err |= UNKNOWN_TYPE; + } + + bg_key.objectid = chunk_key.offset; + bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + bg_key.offset = length; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0); + if (ret) { + error( + "chunk[%llu %llu) did not find the related block group item", + chunk_key.offset, chunk_end); + err |= REFERENCER_MISSING; + } else{ + leaf = path.nodes[0]; + bi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_block_group_item); + read_extent_buffer(leaf, &bg_item, (unsigned long)bi, + sizeof(bg_item)); + if (btrfs_block_group_flags(&bg_item) != type) { + error( +"chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu", + chunk_key.offset, chunk_end, type, + btrfs_block_group_flags(&bg_item)); + err |= REFERENCER_MISSING; + } + } + + num_stripes = btrfs_chunk_num_stripes(eb, chunk); + for (i = 0; i < num_stripes; i++) { + btrfs_release_path(&path); + btrfs_init_path(&path); + devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i); + devext_key.type = BTRFS_DEV_EXTENT_KEY; + devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i); + + ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path, + 0, 0); + if (ret) + goto not_match_dev; + + leaf = path.nodes[0]; + ptr = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_dev_extent); + objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr); + offset = btrfs_dev_extent_chunk_offset(leaf, ptr); + if (objectid != chunk_key.objectid || + offset != chunk_key.offset || + btrfs_dev_extent_length(leaf, ptr) != length) + goto not_match_dev; + continue; +not_match_dev: + err |= BACKREF_MISSING; + error( + "chunk[%llu %llu) stripe %d did not find the related dev extent", + chunk_key.objectid, chunk_end, i); + continue; + } + btrfs_release_path(&path); +out: + return err; +} + +/* + * Main entry function to check known items and update related accounting info + */ +static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_key key; + int slot = 0; + int type; + struct btrfs_extent_data_ref *dref; + int ret; + int err = 0; + +next: + btrfs_item_key_to_cpu(eb, &key, slot); + type = key.type; + + switch (type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, eb, slot); + err |= ret; + break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + ret = check_block_group_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_DEV_ITEM_KEY: + ret = check_dev_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_CHUNK_ITEM_KEY: + ret = check_chunk_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_EXTENT_ITEM_KEY: + case BTRFS_METADATA_ITEM_KEY: + ret = check_extent_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_EXTENT_CSUM_KEY: + total_csum_bytes += btrfs_item_size_nr(eb, slot); + break; + case BTRFS_TREE_BLOCK_REF_KEY: + ret = check_tree_block_backref(fs_info, key.offset, + key.objectid, -1); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref); + ret = check_extent_data_backref(fs_info, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, dref), + btrfs_extent_data_ref_offset(eb, dref), + key.objectid, 0, + btrfs_extent_data_ref_count(eb, dref)); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = check_shared_block_backref(fs_info, key.offset, + key.objectid, -1); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + ret = check_shared_data_backref(fs_info, key.offset, + key.objectid); + err |= ret; + break; + default: + break; + } + + if (++slot < btrfs_header_nritems(eb)) + goto next; + + return err; +} + +/* + * Helper function for later fs/subvol tree check. To determine if a tree + * block should be checked. + * This function will ensure only the direct referencer with lowest rootid to + * check a fs/subvolume tree block. + * + * Backref check at extent tree would detect errors like missing subvolume + * tree, so we can do aggressive check to reduce duplicated checks. + */ +static int should_check(struct btrfs_root *root, struct extent_buffer *eb) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *leaf; + int slot; + struct btrfs_extent_item *ei; + unsigned long ptr; + unsigned long end; + int type; + u32 item_size; + u64 offset; + struct btrfs_extent_inline_ref *iref; + int ret; + + btrfs_init_path(&path); + key.objectid = btrfs_header_bytenr(eb); + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + + /* + * Any failure in backref resolving means we can't determine + * whom the tree block belongs to. + * So in that case, we need to check that tree block + */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) + goto need_check; + + ret = btrfs_previous_extent_item(extent_root, &path, + btrfs_header_bytenr(eb)); + if (ret) + goto need_check; + + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } + + item_size = btrfs_item_size_nr(leaf, slot); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + /* + * We only check the tree block if current root is + * the lowest referencer of it. + */ + if (type == BTRFS_TREE_BLOCK_REF_KEY && + offset < root->objectid) { + btrfs_release_path(&path); + return 0; + } + + ptr += btrfs_extent_inline_ref_size(type); + } + /* + * Normally we should also check keyed tree block ref, but that may be + * very time consuming. Inlined ref should already make us skip a lot + * of refs now. So skip search keyed tree block ref. + */ + +need_check: + btrfs_release_path(&path); + return 1; +} + +/* + * Traversal function for tree block. We will do: + * 1) Skip shared fs/subvolume tree blocks + * 2) Update related bytes accounting + * 3) Pre-order traversal + */ +static int traverse_tree_block(struct btrfs_root *root, + struct extent_buffer *node) +{ + struct extent_buffer *eb; + struct btrfs_key key; + struct btrfs_key drop_key; + int level; + u64 nr; + int i; + int err = 0; + int ret; + + /* + * Skip shared fs/subvolume tree block, in that case they will + * be checked by referencer with lowest rootid + */ + if (is_fstree(root->objectid) && !should_check(root, node)) + return 0; + + /* Update bytes accounting */ + total_btree_bytes += node->len; + if (fs_root_objectid(btrfs_header_owner(node))) + total_fs_tree_bytes += node->len; + if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID) + total_extent_tree_bytes += node->len; + if (!found_old_backref && + btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID && + btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV && + !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC)) + found_old_backref = 1; + + /* pre-order tranversal, check itself first */ + level = btrfs_header_level(node); + ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node), + btrfs_header_level(node), + btrfs_header_owner(node)); + err |= ret; + if (err) + error( + "check %s failed root %llu bytenr %llu level %d, force continue check", + level ? "node":"leaf", root->objectid, + btrfs_header_bytenr(node), btrfs_header_level(node)); + + if (!level) { + btree_space_waste += btrfs_leaf_free_space(root, node); + ret = check_leaf_items(root, node); + err |= ret; + return err; + } + + nr = btrfs_header_nritems(node); + btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress); + btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) * + sizeof(struct btrfs_key_ptr); + + /* Then check all its children */ + for (i = 0; i < nr; i++) { + u64 blocknr = btrfs_node_blockptr(node, i); + + btrfs_node_key_to_cpu(node, &key, i); + if (level == root->root_item.drop_level && + is_dropped_key(&key, &drop_key)) + continue; + + /* + * As a btrfs tree has most 8 levels (0..7), so it's quite safe + * to call the function itself. + */ + eb = read_tree_block(root, blocknr, root->nodesize, 0); + if (extent_buffer_uptodate(eb)) { + ret = traverse_tree_block(root, eb); + err |= ret; + } + free_extent_buffer(eb); + } + + return err; +} + +/* + * Low memory usage version check_chunks_and_extents. + */ +static int check_chunks_and_extents_v2(struct btrfs_root *root) +{ + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_root *root1; + struct btrfs_root *cur_root; + int err = 0; + int ret; + + root1 = root->fs_info->chunk_root; + ret = traverse_tree_block(root1, root1->node); + err |= ret; + + root1 = root->fs_info->tree_root; + ret = traverse_tree_block(root1, root1->node); + err |= ret; + + btrfs_init_path(&path); + key.objectid = BTRFS_EXTENT_TREE_OBJECTID; + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0); + if (ret) { + error("cannot find extent treet in tree_root"); + goto out; + } + + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.type != BTRFS_ROOT_ITEM_KEY) + goto next; + key.offset = (u64)-1; + + cur_root = btrfs_read_fs_root(root->fs_info, &key); + if (IS_ERR(cur_root) || !cur_root) { + error("failed to read tree: %lld", key.objectid); + goto next; + } + + ret = traverse_tree_block(cur_root, cur_root->node); + err |= ret; + +next: + ret = btrfs_next_item(root1, &path); + if (ret) + goto out; + } + +out: + btrfs_release_path(&path); + return err; +} + +static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int overwrite) +{ + struct extent_buffer *c; + struct extent_buffer *old = root->node; + int level; + int ret; + struct btrfs_disk_key disk_key = {0,0,0}; + + level = 0; + + if (overwrite) { + c = old; + extent_buffer_get(c); + goto init; + } + c = btrfs_alloc_free_block(trans, root, + root->nodesize, + root->root_key.objectid, + &disk_key, level, 0, 0); + if (IS_ERR(c)) { + c = old; + extent_buffer_get(c); + overwrite = 1; + } +init: + memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_level(c, level); + btrfs_set_header_bytenr(c, c->start); + btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(c, root->root_key.objectid); + + write_extent_buffer(c, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); + + write_extent_buffer(c, root->fs_info->chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(c), + BTRFS_UUID_SIZE); + + btrfs_mark_buffer_dirty(c); + /* + * this case can happen in the following case: + * + * 1.overwrite previous root. + * + * 2.reinit reloc data root, this is because we skip pin + * down reloc data tree before which means we can allocate + * same block bytenr here. + */ + if (old->start == c->start) { + btrfs_set_root_generation(&root->root_item, + trans->transid); + root->root_item.level = btrfs_header_level(root->node); + ret = btrfs_update_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + if (ret) { + free_extent_buffer(c); + return ret; + } + } + free_extent_buffer(old); + root->node = c; + add_root_to_dirty_list(root); + return 0; +} + +static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int tree_root) +{ + struct extent_buffer *tmp; + struct btrfs_root_item *ri; + struct btrfs_key key; + u64 bytenr; + u32 nodesize; + int level = btrfs_header_level(eb); + int nritems; + int ret; + int i; + + /* + * If we have pinned this block before, don't pin it again. + * This can not only avoid forever loop with broken filesystem + * but also give us some speedups. + */ + if (test_range_bit(&fs_info->pinned_extents, eb->start, + eb->start + eb->len - 1, EXTENT_DIRTY, 0)) + return 0; + + btrfs_pin_extent(fs_info, eb->start, eb->len); + + nodesize = btrfs_super_nodesize(fs_info->super_copy); + nritems = btrfs_header_nritems(eb); + for (i = 0; i < nritems; i++) { + if (level == 0) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_ROOT_ITEM_KEY) + continue; + /* Skip the extent root and reloc roots */ + if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID || key.objectid == BTRFS_TREE_RELOC_OBJECTID || key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) continue; @@ -9195,24 +10217,20 @@ static int pin_metadata_blocks(struct btrfs_fs_info *fs_info) static int reset_block_groups(struct btrfs_fs_info *fs_info) { struct btrfs_block_group_cache *cache; - struct btrfs_path *path; + struct btrfs_path path; struct extent_buffer *leaf; struct btrfs_chunk *chunk; struct btrfs_key key; int ret; u64 start; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = 0; key.type = BTRFS_CHUNK_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0); if (ret < 0) { - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -9227,10 +10245,10 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info) /* First we need to create the in-memory block groups */ while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(fs_info->chunk_root, path); + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(fs_info->chunk_root, &path); if (ret < 0) { - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } if (ret) { @@ -9238,15 +10256,14 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info) break; } } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.type != BTRFS_CHUNK_ITEM_KEY) { - path->slots[0]++; + path.slots[0]++; continue; } - chunk = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_chunk); + chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk); btrfs_add_block_group(fs_info, 0, btrfs_chunk_type(leaf, chunk), key.objectid, key.offset, @@ -9254,7 +10271,7 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info) set_extent_dirty(&fs_info->free_space_cache, key.offset, key.offset + btrfs_chunk_length(leaf, chunk), GFP_NOFS); - path->slots[0]++; + path.slots[0]++; } start = 0; while (1) { @@ -9265,7 +10282,7 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info) start = cache->key.objectid + cache->key.offset; } - btrfs_free_path(path); + btrfs_release_path(&path); return 0; } @@ -9273,22 +10290,18 @@ static int reset_balance(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct btrfs_root *root = fs_info->tree_root; - struct btrfs_path *path; + struct btrfs_path path; struct extent_buffer *leaf; struct btrfs_key key; int del_slot, del_nr = 0; int ret; int found = 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = BTRFS_BALANCE_OBJECTID; key.type = BTRFS_BALANCE_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret) { if (ret > 0) ret = 0; @@ -9298,64 +10311,63 @@ static int reset_balance(struct btrfs_trans_handle *trans, goto out; } - ret = btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, &path); if (ret) goto out; - btrfs_release_path(path); + btrfs_release_path(&path); key.objectid = BTRFS_TREE_RELOC_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) goto out; while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { if (!found) break; if (del_nr) { - ret = btrfs_del_items(trans, root, path, + ret = btrfs_del_items(trans, root, &path, del_slot, del_nr); del_nr = 0; if (ret) goto out; } key.offset++; - btrfs_release_path(path); + btrfs_release_path(&path); found = 0; - ret = btrfs_search_slot(trans, root, &key, path, + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) goto out; continue; } found = 1; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.objectid > BTRFS_TREE_RELOC_OBJECTID) break; if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) { - path->slots[0]++; + path.slots[0]++; continue; } if (!del_nr) { - del_slot = path->slots[0]; + del_slot = path.slots[0]; del_nr = 1; } else { del_nr++; } - path->slots[0]++; + path.slots[0]++; } if (del_nr) { - ret = btrfs_del_items(trans, root, path, del_slot, del_nr); + ret = btrfs_del_items(trans, root, &path, del_slot, del_nr); if (ret) goto out; } - btrfs_release_path(path); + btrfs_release_path(&path); reinit_data_reloc: key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; @@ -9373,7 +10385,7 @@ reinit_data_reloc: goto out; ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID); out: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -9463,7 +10475,7 @@ static int reinit_extent_tree(struct btrfs_trans_handle *trans, static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_trans_handle *trans; struct btrfs_key key; int ret; @@ -9480,31 +10492,26 @@ static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb return PTR_ERR(root); } - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } - path->lowest_level = btrfs_header_level(eb); - if (path->lowest_level) + btrfs_init_path(&path); + path.lowest_level = btrfs_header_level(eb); + if (path.lowest_level) btrfs_node_key_to_cpu(eb, &key, 0); else btrfs_item_key_to_cpu(eb, &key, 0); - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); btrfs_commit_transaction(trans, root); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_trans_handle *trans; struct btrfs_key key; int ret; @@ -9522,26 +10529,21 @@ static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad) return PTR_ERR(root); } - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } - ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1); if (ret) { if (ret > 0) ret = 0; goto out; } - ret = btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, &path); out: btrfs_commit_transaction(trans, root); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -9588,7 +10590,7 @@ static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root, struct btrfs_root *cur_root) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_key key; struct extent_buffer *node; struct btrfs_file_extent_item *fi; @@ -9598,30 +10600,25 @@ static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans, int slot = 0; int ret = 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; buf = malloc(cur_root->fs_info->csum_root->sectorsize); - if (!buf) { - ret = -ENOMEM; - goto out; - } + if (!buf) + return -ENOMEM; + btrfs_init_path(&path); key.objectid = 0; key.offset = 0; key.type = 0; - - ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0); if (ret < 0) goto out; /* Iterate all regular file extents and fill its csum */ while (1) { - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); if (key.type != BTRFS_EXTENT_DATA_KEY) goto next; - node = path->nodes[0]; - slot = path->slots[0]; + node = path.nodes[0]; + slot = path.slots[0]; fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG) goto next; @@ -9638,7 +10635,7 @@ next: * TODO: if next leaf is corrupted, jump to nearest next valid * leaf. */ - ret = btrfs_next_item(cur_root, path); + ret = btrfs_next_item(cur_root, &path); if (ret < 0) goto out; if (ret > 0) { @@ -9648,7 +10645,7 @@ next: } out: - btrfs_free_path(path); + btrfs_release_path(&path); free(buf); return ret; } @@ -9657,7 +10654,7 @@ static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root) { struct btrfs_fs_info *fs_info = csum_root->fs_info; - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *cur_root; struct extent_buffer *node; @@ -9665,15 +10662,11 @@ static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, int slot = 0; int ret = 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = BTRFS_FS_TREE_OBJECTID; key.offset = 0; key.type = BTRFS_ROOT_ITEM_KEY; - - ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0); if (ret < 0) goto out; if (ret > 0) { @@ -9682,8 +10675,8 @@ static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, } while (1) { - node = path->nodes[0]; - slot = path->slots[0]; + node = path.nodes[0]; + slot = path.slots[0]; btrfs_item_key_to_cpu(node, &key, slot); if (key.objectid > BTRFS_LAST_FREE_OBJECTID) goto out; @@ -9704,7 +10697,7 @@ static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, if (ret < 0) goto out; next: - ret = btrfs_next_item(tree_root, path); + ret = btrfs_next_item(tree_root, &path); if (ret > 0) { ret = 0; goto out; @@ -9714,7 +10707,7 @@ next: } out: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -9722,36 +10715,32 @@ static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root) { struct btrfs_root *extent_root = csum_root->fs_info->extent_root; - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_extent_item *ei; struct extent_buffer *leaf; char *buf; struct btrfs_key key; int ret; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = 0; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); if (ret < 0) { - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } buf = malloc(csum_root->sectorsize); if (!buf) { - btrfs_free_path(path); + btrfs_release_path(&path); return -ENOMEM; } while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(extent_root, path); + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(extent_root, &path); if (ret < 0) break; if (ret) { @@ -9759,19 +10748,19 @@ static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, break; } } - leaf = path->nodes[0]; + leaf = path.nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.type != BTRFS_EXTENT_ITEM_KEY) { - path->slots[0]++; + path.slots[0]++; continue; } - ei = btrfs_item_ptr(leaf, path->slots[0], + ei = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_extent_item); if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) { - path->slots[0]++; + path.slots[0]++; continue; } @@ -9779,10 +10768,10 @@ static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, key.offset); if (ret) break; - path->slots[0]++; + path.slots[0]++; } - btrfs_free_path(path); + btrfs_release_path(&path); free(buf); return ret; } @@ -9830,7 +10819,7 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) int ret = 0; struct btrfs_key key; struct extent_buffer *leaf; - struct btrfs_path *path; + struct btrfs_path path; if (!roots_info_cache) { roots_info_cache = malloc(sizeof(*roots_info_cache)); @@ -9839,24 +10828,20 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) cache_tree_init(roots_info_cache); } - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = 0; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0); if (ret < 0) goto out; - leaf = path->nodes[0]; + leaf = path.nodes[0]; while (1) { struct btrfs_key found_key; struct btrfs_extent_item *ei; struct btrfs_extent_inline_ref *iref; - int slot = path->slots[0]; + int slot = path.slots[0]; int type; u64 flags; u64 root_id; @@ -9865,18 +10850,18 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) struct root_item_info *rii; if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(info->extent_root, path); + ret = btrfs_next_leaf(info->extent_root, &path); if (ret < 0) { break; } else if (ret) { ret = 0; break; } - leaf = path->nodes[0]; - slot = path->slots[0]; + leaf = path.nodes[0]; + slot = path.slots[0]; } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); if (found_key.type != BTRFS_EXTENT_ITEM_KEY && found_key.type != BTRFS_METADATA_ITEM_KEY) @@ -9939,11 +10924,11 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) rii->node_count++; } next: - path->slots[0]++; + path.slots[0]++; } out: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -10039,7 +11024,7 @@ static int maybe_repair_root_item(struct btrfs_fs_info *info, */ static int repair_root_items(struct btrfs_fs_info *info) { - struct btrfs_path *path = NULL; + struct btrfs_path path; struct btrfs_key key; struct extent_buffer *leaf; struct btrfs_trans_handle *trans = NULL; @@ -10047,16 +11032,12 @@ static int repair_root_items(struct btrfs_fs_info *info) int bad_roots = 0; int need_trans = 0; + btrfs_init_path(&path); + ret = build_roots_info_cache(info); if (ret) goto out; - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = 0; @@ -10075,19 +11056,19 @@ again: } } - ret = btrfs_search_slot(trans, info->tree_root, &key, path, + ret = btrfs_search_slot(trans, info->tree_root, &key, &path, 0, trans ? 1 : 0); if (ret < 0) goto out; - leaf = path->nodes[0]; + leaf = path.nodes[0]; while (1) { struct btrfs_key found_key; - if (path->slots[0] >= btrfs_header_nritems(leaf)) { - int no_more_keys = find_next_key(path, &key); + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + int no_more_keys = find_next_key(&path, &key); - btrfs_release_path(path); + btrfs_release_path(&path); if (trans) { ret = btrfs_commit_transaction(trans, info->tree_root); @@ -10101,14 +11082,14 @@ again: goto again; } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); if (found_key.type != BTRFS_ROOT_ITEM_KEY) goto next; if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID) goto next; - ret = maybe_repair_root_item(info, path, &found_key, + ret = maybe_repair_root_item(info, &path, &found_key, trans ? 0 : 1); if (ret < 0) goto out; @@ -10116,18 +11097,18 @@ again: if (!trans && repair) { need_trans = 1; key = found_key; - btrfs_release_path(path); + btrfs_release_path(&path); goto again; } bad_roots++; } next: - path->slots[0]++; + path.slots[0]++; } ret = 0; out: free_roots_info_cache(); - btrfs_free_path(path); + btrfs_release_path(&path); if (trans) btrfs_commit_transaction(trans, info->tree_root); if (ret < 0) @@ -10136,12 +11117,42 @@ out: return bad_roots; } +static int clear_free_space_cache(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_block_group_cache *bg_cache; + u64 current = 0; + int ret = 0; + + /* Clear all free space cache inodes and its extent data */ + while (1) { + bg_cache = btrfs_lookup_first_block_group(fs_info, current); + if (!bg_cache) + break; + ret = btrfs_clear_free_space_cache(fs_info, bg_cache); + if (ret < 0) + return ret; + current = bg_cache->key.objectid + bg_cache->key.offset; + } + + /* Don't forget to set cache_generation to -1 */ + trans = btrfs_start_transaction(fs_info->tree_root, 0); + if (IS_ERR(trans)) { + error("failed to update super block cache generation"); + return PTR_ERR(trans); + } + btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1); + btrfs_commit_transaction(trans, fs_info->tree_root); + + return ret; +} + const char * const cmd_check_usage[] = { "btrfs check [options] ", "Check structural integrity of a filesystem (unmounted).", "Check structural integrity of an unmounted filesystem. Verify internal", "trees' consistency and item connectivity. In the repair mode try to", - "fix the problems found.", + "fix the problems found. ", "WARNING: the repair mode is considered dangerous", "", "-s|--super use this superblock copy", @@ -10150,13 +11161,21 @@ const char * const cmd_check_usage[] = { "--readonly run in read-only mode (default)", "--init-csum-tree create a new CRC tree", "--init-extent-tree create a new extent tree", + "--mode allows choice of memory/IO trade-offs", + " where MODE is one of:", + " original - read inodes and extents to memory (requires", + " more memory, does less IO)", + " lowmem - try to use less memory but read blocks again", + " when needed", "--check-data-csum verify checksums of data blocks", - "-Q|--qgroup-report print a report on qgroup consistency", + "-Q|--qgroup-report print a report on qgroup consistency", "-E|--subvol-extents ", " print subvolume extents and sharing state", "-r|--tree-root use the given bytenr for the tree root", "--chunk-root use the given bytenr for the chunk tree root", "-p|--progress indicate progress", + "--clear-space-cache v1|v2 clear space cache for v1 or v2", + " NOTE: v1 support implemented", NULL }; @@ -10174,15 +11193,17 @@ int cmd_check(int argc, char **argv) u64 num; int init_csum_tree = 0; int readonly = 0; + int clear_space_cache = 0; int qgroup_report = 0; int qgroups_repaired = 0; - enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE; + unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE; while(1) { int c; enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM, GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM, - GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE }; + GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE, + GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE }; static const struct option long_options[] = { { "super", required_argument, NULL, 's' }, { "repair", no_argument, NULL, GETOPT_VAL_REPAIR }, @@ -10200,6 +11221,10 @@ int cmd_check(int argc, char **argv) { "chunk-root", required_argument, NULL, GETOPT_VAL_CHUNK_TREE }, { "progress", no_argument, NULL, 'p' }, + { "mode", required_argument, NULL, + GETOPT_VAL_MODE }, + { "clear-space-cache", required_argument, NULL, + GETOPT_VAL_CLEAR_SPACE_CACHE}, { NULL, 0, NULL, 0} }; @@ -10214,8 +11239,8 @@ int cmd_check(int argc, char **argv) case 's': num = arg_strtou64(optarg); if (num >= BTRFS_SUPER_MIRROR_MAX) { - fprintf(stderr, - "ERROR: super mirror should be less than: %d\n", + error( + "super mirror should be less than %d", BTRFS_SUPER_MIRROR_MAX); exit(1); } @@ -10264,6 +11289,23 @@ int cmd_check(int argc, char **argv) case GETOPT_VAL_CHECK_CSUM: check_data_csum = 1; break; + case GETOPT_VAL_MODE: + check_mode = parse_check_mode(optarg); + if (check_mode == CHECK_MODE_UNKNOWN) { + error("unknown mode: %s", optarg); + exit(1); + } + break; + case GETOPT_VAL_CLEAR_SPACE_CACHE: + if (strcmp(optarg, "v1") != 0) { + error( + "only v1 support implmented, unrecognized value %s", + optarg); + exit(1); + } + clear_space_cache = 1; + ctree_flags |= OPEN_CTREE_WRITES; + break; } } @@ -10277,7 +11319,15 @@ int cmd_check(int argc, char **argv) /* This check is the only reason for --readonly to exist */ if (readonly && repair) { - fprintf(stderr, "Repair options are not compatible with --readonly\n"); + error("repair options are not compatible with --readonly"); + exit(1); + } + + /* + * Not supported yet + */ + if (repair && check_mode == CHECK_MODE_LOWMEM) { + error("low memory mode doesn't support repair yet"); exit(1); } @@ -10285,10 +11335,10 @@ int cmd_check(int argc, char **argv) cache_tree_init(&root_cache); if((ret = check_mounted(argv[optind])) < 0) { - fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret)); + error("could not check mount status: %s", strerror(-ret)); goto err_out; } else if(ret) { - fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]); + error("%s is currently mounted, aborting", argv[optind]); ret = -EBUSY; goto err_out; } @@ -10300,27 +11350,45 @@ int cmd_check(int argc, char **argv) info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr, chunk_root_bytenr, ctree_flags); if (!info) { - fprintf(stderr, "Couldn't open file system\n"); + error("cannot open file system"); ret = -EIO; goto err_out; } global_info = info; root = info->fs_root; + if (clear_space_cache) { + if (btrfs_fs_compat_ro(info, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { + error( + "free space cache v2 detected, clearing not implemented"); + ret = 1; + goto close_out; + } + printf("Clearing free space cache\n"); + ret = clear_free_space_cache(info); + if (ret) { + error("failed to clear free space cache"); + ret = 1; + } else { + printf("Free space cache cleared\n"); + } + goto close_out; + } /* * repair mode will force us to commit transaction which * will make us fail to load log tree when mounting. */ if (repair && btrfs_super_log_root(info->super_copy)) { - ret = ask_user("repair mode will force to clear out log tree, Are you sure?"); + ret = ask_user("repair mode will force to clear out log tree, are you sure?"); if (!ret) { ret = 1; goto close_out; } ret = zero_log_tree(root); if (ret) { - fprintf(stderr, "fail to zero log tree\n"); + error("failed to zero log tree: %d", ret); goto close_out; } } @@ -10345,7 +11413,7 @@ int cmd_check(int argc, char **argv) if (!extent_buffer_uptodate(info->tree_root->node) || !extent_buffer_uptodate(info->dev_root->node) || !extent_buffer_uptodate(info->chunk_root->node)) { - fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n"); + error("critical roots corrupted, unable to check the filesystem"); ret = -EIO; goto close_out; } @@ -10355,7 +11423,7 @@ int cmd_check(int argc, char **argv) trans = btrfs_start_transaction(info->extent_root, 0); if (IS_ERR(trans)) { - fprintf(stderr, "Error starting transaction\n"); + error("error starting transaction"); ret = PTR_ERR(trans); goto close_out; } @@ -10368,10 +11436,11 @@ int cmd_check(int argc, char **argv) } if (init_csum_tree) { - fprintf(stderr, "Reinit crc root\n"); + printf("Reinitialize checksum tree\n"); ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0); if (ret) { - fprintf(stderr, "crc root initialization failed\n"); + error("checksum tree initialization failed: %d", + ret); ret = -EIO; goto close_out; } @@ -10379,7 +11448,7 @@ int cmd_check(int argc, char **argv) ret = fill_csum_tree(trans, info->csum_root, init_extent_tree); if (ret) { - fprintf(stderr, "crc refilling failed\n"); + error("checksum tree refilling failed: %d", ret); return -EIO; } } @@ -10392,21 +11461,24 @@ int cmd_check(int argc, char **argv) goto close_out; } if (!extent_buffer_uptodate(info->extent_root->node)) { - fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n"); + error("critical: extent_root, unable to check the filesystem"); ret = -EIO; goto close_out; } if (!extent_buffer_uptodate(info->csum_root->node)) { - fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n"); + error("critical: csum_root, unable to check the filesystem"); ret = -EIO; goto close_out; } if (!ctx.progress_enabled) - fprintf(stderr, "checking extents\n"); - ret = check_chunks_and_extents(root); + printf("checking extents"); + if (check_mode == CHECK_MODE_LOWMEM) + ret = check_chunks_and_extents_v2(root); + else + ret = check_chunks_and_extents(root); if (ret) - fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n"); + printf("Errors found in extent allocation tree or chunk allocation"); ret = repair_root_items(info); if (ret < 0) @@ -10492,7 +11564,7 @@ int cmd_check(int argc, char **argv) } if (!list_empty(&root->fs_info->recow_ebs)) { - fprintf(stderr, "Transid errors in file system\n"); + error("transid errors in file system"); ret = 1; } out: