X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=cmds-check.c;h=dd8926df1af3d2976141d1568fc322775eb0f147;hb=52bfe9ef78fdf6c1f1c5ceb3f7a8bf8bf2164775;hp=8e12610fc6f7d61f7cff6dd3730070f90dd3d41e;hpb=e1a5ecc206121b48b62150b94faf36d9362d675b;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/cmds-check.c b/cmds-check.c index 8e12610..dd8926d 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -41,6 +41,7 @@ #include "rbtree-utils.h" #include "backref.h" #include "ulist.h" +#include "hash.h" enum task_position { TASK_EXTENTS, @@ -67,7 +68,6 @@ static u64 data_bytes_referenced = 0; static int found_old_backref = 0; static LIST_HEAD(duplicate_extents); static LIST_HEAD(delete_items); -static int repair = 0; static int no_holes = 0; static int init_extent_tree = 0; static int check_data_csum = 0; @@ -75,6 +75,15 @@ static struct btrfs_fs_info *global_info; static struct task_ctx ctx = { 0 }; static struct cache_tree *roots_info_cache = NULL; +enum btrfs_check_mode { + CHECK_MODE_ORIGINAL, + CHECK_MODE_LOWMEM, + CHECK_MODE_UNKNOWN, + CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL +}; + +static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT; + struct extent_backref { struct list_head list; unsigned int is_data:1; @@ -84,6 +93,11 @@ struct extent_backref { unsigned int broken:1; }; +static inline struct extent_backref* to_extent_backref(struct list_head *entry) +{ + return list_entry(entry, struct extent_backref, list); +} + struct data_backref { struct extent_backref node; union { @@ -99,6 +113,11 @@ struct data_backref { u32 found_ref; }; +static inline struct data_backref* to_data_backref(struct extent_backref *back) +{ + return container_of(back, struct data_backref, node); +} + /* * Much like data_backref, just removed the undetermined members * and change it to use list_head. @@ -122,6 +141,14 @@ struct tree_backref { }; }; +static inline struct tree_backref* to_tree_backref(struct extent_backref *back) +{ + return container_of(back, struct tree_backref, node); +} + +/* Explicit initialization for extent_record::flag_block_full_backref */ +enum { FLAG_UNSET = 2 }; + struct extent_record { struct list_head backrefs; struct list_head dups; @@ -138,7 +165,7 @@ struct extent_record { u64 info_objectid; u32 num_duplicates; u8 info_level; - int flag_block_full_backref; + unsigned int flag_block_full_backref:2; unsigned int found_rec:1; unsigned int content_checked:1; unsigned int owner_ref_checked:1; @@ -149,20 +176,30 @@ struct extent_record { unsigned int wrong_chunk_type:1; }; +static inline struct extent_record* to_extent_record(struct list_head *entry) +{ + return container_of(entry, struct extent_record, list); +} + struct inode_backref { struct list_head list; unsigned int found_dir_item:1; unsigned int found_dir_index:1; unsigned int found_inode_ref:1; - unsigned int filetype:8; + u8 filetype; + u8 ref_type; int errors; - unsigned int ref_type; u64 dir; u64 index; u16 namelen; char name[0]; }; +static inline struct inode_backref* to_inode_backref(struct list_head *entry) +{ + return list_entry(entry, struct inode_backref, list); +} + struct root_item_record { struct list_head list; u64 objectid; @@ -253,6 +290,11 @@ struct root_backref { char name[0]; }; +static inline struct root_backref* to_root_backref(struct list_head *entry) +{ + return list_entry(entry, struct root_backref, list); +} + struct root_record { struct list_head backrefs; struct cache_extent cache; @@ -310,6 +352,23 @@ struct root_item_info { struct cache_extent cache_extent; }; +/* + * Error bit for low memory mode check. + * + * Currently no caller cares about it yet. Just internal use for error + * classification. + */ +#define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */ +#define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */ +#define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */ +#define REFERENCER_MISSING (1 << 3) /* Referencer not found */ +#define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */ +#define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */ +#define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */ +#define UNKNOWN_TYPE (1 << 6) /* Unknown type */ +#define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */ +#define CHUNK_TYPE_MISMATCH (1 << 8) + static void *print_status_check(void *p) { struct task_ctx *priv = p; @@ -344,6 +403,18 @@ static int print_status_return(void *p) return 0; } +static enum btrfs_check_mode parse_check_mode(const char *str) +{ + if (strcmp(str, "lowmem") == 0) + return CHECK_MODE_LOWMEM; + if (strcmp(str, "orig") == 0) + return CHECK_MODE_ORIGINAL; + if (strcmp(str, "original") == 0) + return CHECK_MODE_ORIGINAL; + + return CHECK_MODE_UNKNOWN; +} + /* Compatible function to allow reuse of old codes */ static u64 first_extent_gap(struct rb_root *holes) { @@ -476,7 +547,7 @@ static int del_file_extent_hole(struct rb_root *holes, return -EEXIST; /* - * Now there will be no overflap, delete the hole and re-add the + * Now there will be no overlap, delete the hole and re-add the * split(s) if they exists. */ if (start > hole->start) { @@ -589,6 +660,7 @@ static struct inode_record *clone_inode_rec(struct inode_record *orig_rec) struct inode_backref *tmp; struct orphan_data_extent *src_orphan; struct orphan_data_extent *dst_orphan; + struct rb_node *rb; size_t size; int ret; @@ -621,10 +693,21 @@ static struct inode_record *clone_inode_rec(struct inode_record *orig_rec) list_add_tail(&dst_orphan->list, &rec->orphan_extents); } ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes); - BUG_ON(ret < 0); + if (ret < 0) + goto cleanup_rb; return rec; +cleanup_rb: + rb = rb_first(&rec->holes); + while (rb) { + struct file_extent_hole *hole; + + hole = rb_entry(rb, struct file_extent_hole, node); + rb = rb_next(rb); + free(hole); + } + cleanup: if (!list_empty(&rec->backrefs)) list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) { @@ -746,9 +829,9 @@ static void print_ref_error(int errors) if (errors & REF_ERR_DUP_INODE_REF) fprintf(stderr, ", dup inode ref"); if (errors & REF_ERR_INDEX_UNMATCH) - fprintf(stderr, ", index unmatch"); + fprintf(stderr, ", index mismatch"); if (errors & REF_ERR_FILETYPE_UNMATCH) - fprintf(stderr, ", filetype unmatch"); + fprintf(stderr, ", filetype mismatch"); if (errors & REF_ERR_NAME_TOO_LONG) fprintf(stderr, ", name too long"); if (errors & REF_ERR_NO_ROOT_REF) @@ -831,8 +914,7 @@ static void free_inode_rec(struct inode_record *rec) return; while (!list_empty(&rec->backrefs)) { - backref = list_entry(rec->backrefs.next, - struct inode_backref, list); + backref = to_inode_backref(rec->backrefs.next); list_del(&backref->list); free(backref); } @@ -855,7 +937,7 @@ static void maybe_free_inode_rec(struct cache_tree *inode_cache, struct cache_extent *cache; struct inode_backref *tmp, *backref; struct ptr_node *node; - unsigned char filetype; + u8 filetype; if (!rec->found_inode_item) return; @@ -986,7 +1068,7 @@ static struct inode_backref *get_inode_backref(struct inode_record *rec, static int add_inode_backref(struct cache_tree *inode_cache, u64 ino, u64 dir, u64 index, const char *name, int namelen, - int filetype, int itemtype, int errors) + u8 filetype, u8 itemtype, int errors) { struct inode_record *rec; struct inode_backref *backref; @@ -1389,7 +1471,7 @@ static int process_dir_item(struct btrfs_root *root, u32 data_len; int error; int nritems = 0; - int filetype; + u8 filetype; struct btrfs_dir_item *di; struct inode_record *rec; struct cache_tree *root_cache; @@ -1830,8 +1912,14 @@ static int check_child_node(struct btrfs_root *root, return ret; } +struct node_refs { + u64 bytenr[BTRFS_MAX_LEVEL]; + u64 refs[BTRFS_MAX_LEVEL]; +}; + static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, - struct walk_control *wc, int *level) + struct walk_control *wc, int *level, + struct node_refs *nrefs) { enum btrfs_tree_block_status status; u64 bytenr; @@ -1844,12 +1932,20 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = btrfs_lookup_extent_info(NULL, root, + + if (path->nodes[*level]->start == nrefs->bytenr[*level]) { + refs = nrefs->refs[*level]; + ret = 0; + } else { + ret = btrfs_lookup_extent_info(NULL, root, path->nodes[*level]->start, *level, 1, &refs, NULL); - if (ret < 0) { - err = ret; - goto out; + if (ret < 0) { + err = ret; + goto out; + } + nrefs->bytenr[*level] = path->nodes[*level]->start; + nrefs->refs[*level] = refs; } if (refs > 1) { @@ -1880,10 +1976,19 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = root->nodesize; - ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1, - 1, &refs, NULL); - if (ret < 0) - refs = 0; + + if (bytenr == nrefs->bytenr[*level - 1]) { + refs = nrefs->refs[*level - 1]; + } else { + ret = btrfs_lookup_extent_info(NULL, root, bytenr, + *level - 1, 1, &refs, NULL); + if (ret < 0) { + refs = 0; + } else { + nrefs->bytenr[*level - 1] = bytenr; + nrefs->refs[*level - 1] = refs; + } + } if (refs > 1) { ret = enter_shared_node(root, bytenr, refs, @@ -1976,7 +2081,7 @@ static int check_root_dir(struct inode_record *rec) goto out; if (list_empty(&rec->backrefs)) goto out; - backref = list_entry(rec->backrefs.next, struct inode_backref, list); + backref = to_inode_backref(rec->backrefs.next); if (!backref->found_inode_ref) goto out; if (backref->index != 0 || backref->namelen != 2 || @@ -2082,7 +2187,7 @@ static int add_missing_dir_index(struct btrfs_root *root, struct inode_record *rec, struct inode_backref *backref) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_trans_handle *trans; struct btrfs_dir_item *dir_item; struct extent_buffer *leaf; @@ -2093,27 +2198,22 @@ static int add_missing_dir_index(struct btrfs_root *root, u32 data_size = sizeof(*dir_item) + backref->namelen; int ret; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } fprintf(stderr, "repairing missing dir index item for inode %llu\n", (unsigned long long)rec->ino); + + btrfs_init_path(&path); key.objectid = backref->dir; key.type = BTRFS_DIR_INDEX_KEY; key.offset = backref->index; - - ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); + ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size); BUG_ON(ret); - leaf = path->nodes[0]; - dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); + leaf = path.nodes[0]; + dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item); disk_key.objectid = cpu_to_le64(rec->ino); disk_key.type = BTRFS_INODE_ITEM_KEY; @@ -2126,7 +2226,7 @@ static int add_missing_dir_index(struct btrfs_root *root, name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen); btrfs_mark_buffer_dirty(leaf); - btrfs_free_path(path); + btrfs_release_path(&path); btrfs_commit_transaction(trans, root); backref->found_dir_index = 1; @@ -2151,31 +2251,25 @@ static int delete_dir_index(struct btrfs_root *root, { struct btrfs_trans_handle *trans; struct btrfs_dir_item *di; - struct btrfs_path *path; + struct btrfs_path path; int ret = 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } - fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n", (unsigned long long)backref->dir, BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index, (unsigned long long)root->objectid); - di = btrfs_lookup_dir_index(trans, root, path, backref->dir, + btrfs_init_path(&path); + di = btrfs_lookup_dir_index(trans, root, &path, backref->dir, backref->name, backref->namelen, backref->index, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); - btrfs_free_path(path); + btrfs_release_path(&path); btrfs_commit_transaction(trans, root); if (ret == -ENOENT) return 0; @@ -2183,11 +2277,11 @@ static int delete_dir_index(struct btrfs_root *root, } if (!di) - ret = btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, &path); else - ret = btrfs_delete_one_dir_name(trans, root, path, di); + ret = btrfs_delete_one_dir_name(trans, root, &path, di); BUG_ON(ret); - btrfs_free_path(path); + btrfs_release_path(&path); btrfs_commit_transaction(trans, root); return ret; } @@ -2590,48 +2684,46 @@ out: */ static int find_normal_file_extent(struct btrfs_root *root, u64 ino) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_file_extent_item *fi; u8 type; int ret = 0; - path = btrfs_alloc_path(); - if (!path) - goto out; + btrfs_init_path(&path); key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret < 0) { ret = 0; goto out; } - if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); + if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); if (ret) { ret = 0; goto out; } } while (1) { - btrfs_item_key_to_cpu(path->nodes[0], &found_key, - path->slots[0]); + btrfs_item_key_to_cpu(path.nodes[0], &found_key, + path.slots[0]); if (found_key.objectid != ino || found_key.type != BTRFS_EXTENT_DATA_KEY) break; - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + fi = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_file_extent_item); - type = btrfs_file_extent_type(path->nodes[0], fi); + type = btrfs_file_extent_type(path.nodes[0], fi); if (type != BTRFS_FILE_EXTENT_INLINE) { ret = 1; goto out; } } out: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -2689,7 +2781,7 @@ static int repair_inode_no_item(struct btrfs_trans_handle *trans, type_recovered = 1; filetype = BTRFS_FT_REG_FILE; } else{ - printf("Can't determint the filetype for inode %llu, assume it is a normal file\n", + printf("Can't determine the filetype for inode %llu, assume it is a normal file\n", rec->ino); type_recovered = 1; filetype = BTRFS_FT_REG_FILE; @@ -2822,7 +2914,7 @@ out: static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) { struct btrfs_trans_handle *trans; - struct btrfs_path *path; + struct btrfs_path path; int ret = 0; if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | @@ -2834,10 +2926,6 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) I_ERR_FILE_NBYTES_WRONG))) return rec->errors; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - /* * For nlink repair, it may create a dir and add link, so * 2 for parent(256)'s dir_index and dir_item @@ -2846,27 +2934,26 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) * 2 for lost+found dir's dir_index and dir_item for the file */ trans = btrfs_start_transaction(root, 7); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } + btrfs_init_path(&path); if (rec->errors & I_ERR_NO_INODE_ITEM) - ret = repair_inode_no_item(trans, root, path, rec); + ret = repair_inode_no_item(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN) - ret = repair_inode_orphan_extent(trans, root, path, rec); + ret = repair_inode_orphan_extent(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT) - ret = repair_inode_discount_extent(trans, root, path, rec); + ret = repair_inode_discount_extent(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG) - ret = repair_inode_isize(trans, root, path, rec); + ret = repair_inode_isize(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM) - ret = repair_inode_orphan_item(trans, root, path, rec); + ret = repair_inode_orphan_item(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG) - ret = repair_inode_nlinks(trans, root, path, rec); + ret = repair_inode_nlinks(trans, root, &path, rec); if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG) - ret = repair_inode_nbytes(trans, root, path, rec); + ret = repair_inode_nbytes(trans, root, &path, rec); btrfs_commit_transaction(trans, root); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -2892,7 +2979,7 @@ static int check_inode_recs(struct btrfs_root *root, /* * We need to record the highest inode number for later 'lost+found' * dir creation. - * We must select a ino not used/refered by any existing inode, or + * We must select an ino not used/referred by any existing inode, or * 'lost+found' ino may be a missing ino in a corrupted leaf, * this may cause 'lost+found' dir has wrong nlinks. */ @@ -3113,13 +3200,12 @@ static void free_root_record(struct cache_extent *cache) rec = container_of(cache, struct root_record, cache); while (!list_empty(&rec->backrefs)) { - backref = list_entry(rec->backrefs.next, - struct root_backref, list); + backref = to_root_backref(rec->backrefs.next); list_del(&backref->list); free(backref); } - kfree(rec); + free(rec); } FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record); @@ -3400,7 +3486,7 @@ static int repair_btree(struct btrfs_root *root, struct cache_tree *corrupt_blocks) { struct btrfs_trans_handle *trans; - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_corrupt_block *corrupt; struct cache_extent *cache; struct btrfs_key key; @@ -3411,23 +3497,20 @@ static int repair_btree(struct btrfs_root *root, if (cache_tree_empty(corrupt_blocks)) return 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); fprintf(stderr, "Error starting transaction: %s\n", strerror(-ret)); - goto out_free_path; + return ret; } + btrfs_init_path(&path); cache = first_cache_extent(corrupt_blocks); while (cache) { corrupt = container_of(cache, struct btrfs_corrupt_block, cache); level = corrupt->level; - path->lowest_level = level; + path.lowest_level = level; key.objectid = corrupt->key.objectid; key.type = corrupt->key.type; key.offset = corrupt->key.offset; @@ -3438,22 +3521,22 @@ static int repair_btree(struct btrfs_root *root, * so ins_len set to 0 here. * Balance will be done after all corrupt node/leaf is deleted. */ - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); if (ret < 0) goto out; - offset = btrfs_node_blockptr(path->nodes[level], - path->slots[level]); + offset = btrfs_node_blockptr(path.nodes[level], + path.slots[level]); /* Remove the ptr */ - ret = btrfs_del_ptr(trans, root, path, level, - path->slots[level]); + ret = btrfs_del_ptr(trans, root, &path, level, + path.slots[level]); if (ret < 0) goto out; /* * Remove the corresponding extent * return value is not concerned. */ - btrfs_release_path(path); + btrfs_release_path(&path); ret = btrfs_free_extent(trans, root, offset, root->nodesize, 0, root->root_key.objectid, level - 1, 0); @@ -3466,18 +3549,17 @@ static int repair_btree(struct btrfs_root *root, corrupt = container_of(cache, struct btrfs_corrupt_block, cache); memcpy(&key, &corrupt->key, sizeof(key)); - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) goto out; /* return will always >0 since it won't find the item */ ret = 0; - btrfs_release_path(path); + btrfs_release_path(&path); cache = next_cache_extent(cache); } out: btrfs_commit_transaction(trans, root); -out_free_path: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -3497,6 +3579,7 @@ static int check_fs_root(struct btrfs_root *root, struct orphan_data_extent *orphan; struct orphan_data_extent *tmp; enum btrfs_tree_block_status status; + struct node_refs nrefs; /* * Reuse the corrupt_block cache tree to record corrupted tree block @@ -3518,6 +3601,7 @@ static int check_fs_root(struct btrfs_root *root, memset(&root_node, 0, sizeof(root_node)); cache_tree_init(&root_node.root_cache); cache_tree_init(&root_node.inode_cache); + memset(&nrefs, 0, sizeof(nrefs)); /* Move the orphan extent record to corresponding inode_record */ list_for_each_entry_safe(orphan, tmp, @@ -3557,6 +3641,11 @@ static int check_fs_root(struct btrfs_root *root, btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); level = root_item->drop_level; path.lowest_level = level; + if (level > btrfs_header_level(root->node) || + level >= BTRFS_MAX_LEVEL) { + error("ignoring invalid drop level: %u", level); + goto skip_walking; + } wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (wret < 0) goto skip_walking; @@ -3567,7 +3656,7 @@ static int check_fs_root(struct btrfs_root *root, } while (1) { - wret = walk_down_tree(root, &path, wc, &level); + wret = walk_down_tree(root, &path, wc, &level, &nrefs); if (wret < 0) ret = wret; if (wret != 0) @@ -3738,4641 +3827,6800 @@ out: return err; } -static int all_backpointers_checked(struct extent_record *rec, int print_errs) +#define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */ +#define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */ +#define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */ +#define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */ +#define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */ +#define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */ + +/* + * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified + * INODE_REF/INODE_EXTREF match. + * + * @root: the root of the fs/file tree + * @ref_key: the key of the INODE_REF/INODE_EXTREF + * @key: the key of the DIR_ITEM/DIR_INDEX + * @index: the index in the INODE_REF/INODE_EXTREF, be used to + * distinguish root_dir between normal dir/file + * @name: the name in the INODE_REF/INODE_EXTREF + * @namelen: the length of name in the INODE_REF/INODE_EXTREF + * @mode: the st_mode of INODE_ITEM + * + * Return 0 if no error occurred. + * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir. + * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal + * dir/file. + * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX + * not match for normal dir/file. + */ +static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key, + struct btrfs_key *key, u64 index, char *name, + u32 namelen, u32 mode) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *back; - struct tree_backref *tback; - struct data_backref *dback; - u64 found = 0; - int err = 0; + struct btrfs_path path; + struct extent_buffer *node; + struct btrfs_dir_item *di; + struct btrfs_key location; + char namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 name_len; + u32 data_len; + u8 filetype; + int slot; + int ret; - while(cur != &rec->backrefs) { - back = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (!back->found_extent_tree) { - err = 1; - if (!print_errs) - goto out; - if (back->is_data) { - dback = (struct data_backref *)back; - fprintf(stderr, "Backref %llu %s %llu" - " owner %llu offset %llu num_refs %lu" - " not found in extent tree\n", - (unsigned long long)rec->start, - back->full_backref ? - "parent" : "root", - back->full_backref ? - (unsigned long long)dback->parent: - (unsigned long long)dback->root, - (unsigned long long)dback->owner, - (unsigned long long)dback->offset, - (unsigned long)dback->num_refs); - } else { - tback = (struct tree_backref *)back; - fprintf(stderr, "Backref %llu parent %llu" - " root %llu not found in extent tree\n", - (unsigned long long)rec->start, - (unsigned long long)tback->parent, - (unsigned long long)tback->root); - } - } - if (!back->is_data && !back->found_ref) { - err = 1; - if (!print_errs) - goto out; - tback = (struct tree_backref *)back; - fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n", - (unsigned long long)rec->start, - back->full_backref ? "parent" : "root", - back->full_backref ? - (unsigned long long)tback->parent : - (unsigned long long)tback->root, back); - } - if (back->is_data) { - dback = (struct data_backref *)back; - if (dback->found_ref != dback->num_refs) { - err = 1; - if (!print_errs) - goto out; - fprintf(stderr, "Incorrect local backref count" - " on %llu %s %llu owner %llu" - " offset %llu found %u wanted %u back %p\n", - (unsigned long long)rec->start, - back->full_backref ? - "parent" : "root", - back->full_backref ? - (unsigned long long)dback->parent: - (unsigned long long)dback->root, - (unsigned long long)dback->owner, - (unsigned long long)dback->offset, - dback->found_ref, dback->num_refs, back); - } - if (dback->disk_bytenr != rec->start) { - err = 1; - if (!print_errs) - goto out; - fprintf(stderr, "Backref disk bytenr does not" - " match extent record, bytenr=%llu, " - "ref bytenr=%llu\n", - (unsigned long long)rec->start, - (unsigned long long)dback->disk_bytenr); - } + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, key, &path, 0, 0); + if (ret < 0) { + ret = DIR_ITEM_MISSING; + goto out; + } - if (dback->bytes != rec->nr) { - err = 1; - if (!print_errs) - goto out; - fprintf(stderr, "Backref bytes do not match " - "extent backref, bytenr=%llu, ref " - "bytes=%llu, backref bytes=%llu\n", - (unsigned long long)rec->start, - (unsigned long long)rec->nr, - (unsigned long long)dback->bytes); - } - } - if (!back->is_data) { - found += 1; + /* Process root dir and goto out*/ + if (index == 0) { + if (ret == 0) { + ret = ROOT_DIR_ERROR; + error( + "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s", + root->objectid, + ref_key->type == BTRFS_INODE_REF_KEY ? + "REF" : "EXTREF", + ref_key->objectid, ref_key->offset, + key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX"); } else { - dback = (struct data_backref *)back; - found += dback->found_ref; + ret = 0; } + + goto out; } - if (found != rec->refs) { - err = 1; - if (!print_errs) - goto out; - fprintf(stderr, "Incorrect global backref count " - "on %llu found %llu wanted %llu\n", - (unsigned long long)rec->start, - (unsigned long long)found, - (unsigned long long)rec->refs); - } -out: - return err; -} -static int free_all_extent_backrefs(struct extent_record *rec) -{ - struct extent_backref *back; - struct list_head *cur; - while (!list_empty(&rec->backrefs)) { - cur = rec->backrefs.next; - back = list_entry(cur, struct extent_backref, list); - list_del(cur); - free(back); + /* Process normal file/dir */ + if (ret > 0) { + ret = DIR_ITEM_MISSING; + error( + "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d", + root->objectid, + ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF", + ref_key->objectid, ref_key->offset, + key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", + key->objectid, key->offset, namelen, name, + imode_to_type(mode)); + goto out; } - return 0; -} -static void free_extent_record_cache(struct btrfs_fs_info *fs_info, - struct cache_tree *extent_cache) -{ - struct cache_extent *cache; - struct extent_record *rec; + /* Check whether inode_id/filetype/name match */ + node = path.nodes[0]; + slot = path.slots[0]; + di = btrfs_item_ptr(node, slot, struct btrfs_dir_item); + total = btrfs_item_size_nr(node, slot); + while (cur < total) { + ret = DIR_ITEM_MISMATCH; + name_len = btrfs_dir_name_len(node, di); + data_len = btrfs_dir_data_len(node, di); + + btrfs_dir_item_key_to_cpu(node, di, &location); + if (location.objectid != ref_key->objectid || + location.type != BTRFS_INODE_ITEM_KEY || + location.offset != 0) + goto next; - while (1) { - cache = first_cache_extent(extent_cache); - if (!cache) - break; - rec = container_of(cache, struct extent_record, cache); - remove_cache_extent(extent_cache, cache); - free_all_extent_backrefs(rec); - free(rec); + filetype = btrfs_dir_type(node, di); + if (imode_to_type(mode) != filetype) + goto next; + + if (name_len <= BTRFS_NAME_LEN) { + len = name_len; + } else { + len = BTRFS_NAME_LEN; + warning("root %llu %s[%llu %llu] name too long %u, trimmed", + root->objectid, + key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", + key->objectid, key->offset, name_len); + } + read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len); + if (len != namelen || strncmp(namebuf, name, len)) + goto next; + + ret = 0; + goto out; +next: + len = sizeof(*di) + name_len + data_len; + di = (struct btrfs_dir_item *)((char *)di + len); + cur += len; } + if (ret == DIR_ITEM_MISMATCH) + error( + "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d", + root->objectid, + ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF", + ref_key->objectid, ref_key->offset, + key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", + key->objectid, key->offset, namelen, name, + imode_to_type(mode)); +out: + btrfs_release_path(&path); + return ret; } -static int maybe_free_extent_rec(struct cache_tree *extent_cache, - struct extent_record *rec) +/* + * Traverse the given INODE_REF and call find_dir_item() to find related + * DIR_ITEM/DIR_INDEX. + * + * @root: the root of the fs/file tree + * @ref_key: the key of the INODE_REF + * @refs: the count of INODE_REF + * @mode: the st_mode of INODE_ITEM + * + * Return 0 if no error occurred. + */ +static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key, + struct extent_buffer *node, int slot, u64 *refs, + int mode) { - if (rec->content_checked && rec->owner_ref_checked && - rec->extent_item_refs == rec->refs && rec->refs > 0 && - rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) && - !rec->bad_full_backref && !rec->crossing_stripes && - !rec->wrong_chunk_type) { - remove_cache_extent(extent_cache, &rec->cache); - free_all_extent_backrefs(rec); - list_del_init(&rec->list); - free(rec); + struct btrfs_key key; + struct btrfs_inode_ref *ref; + char namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 name_len; + u64 index; + int ret, err = 0; + + ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref); + total = btrfs_item_size_nr(node, slot); + +next: + /* Update inode ref count */ + (*refs)++; + + index = btrfs_inode_ref_index(node, ref); + name_len = btrfs_inode_ref_name_len(node, ref); + if (name_len <= BTRFS_NAME_LEN) { + len = name_len; + } else { + len = BTRFS_NAME_LEN; + warning("root %llu INODE_REF[%llu %llu] name too long", + root->objectid, ref_key->objectid, ref_key->offset); } - return 0; + + read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len); + + /* Check root dir ref name */ + if (index == 0 && strncmp(namebuf, "..", name_len)) { + error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s", + root->objectid, ref_key->objectid, ref_key->offset, + namebuf); + err |= ROOT_DIR_ERROR; + } + + /* Find related DIR_INDEX */ + key.objectid = ref_key->offset; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = index; + ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode); + err |= ret; + + /* Find related dir_item */ + key.objectid = ref_key->offset; + key.type = BTRFS_DIR_ITEM_KEY; + key.offset = btrfs_name_hash(namebuf, len); + ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode); + err |= ret; + + len = sizeof(*ref) + name_len; + ref = (struct btrfs_inode_ref *)((char *)ref + len); + cur += len; + if (cur < total) + goto next; + + return err; } -static int check_owner_ref(struct btrfs_root *root, - struct extent_record *rec, - struct extent_buffer *buf) +/* + * Traverse the given INODE_EXTREF and call find_dir_item() to find related + * DIR_ITEM/DIR_INDEX. + * + * @root: the root of the fs/file tree + * @ref_key: the key of the INODE_EXTREF + * @refs: the count of INODE_EXTREF + * @mode: the st_mode of INODE_ITEM + * + * Return 0 if no error occurred. + */ +static int check_inode_extref(struct btrfs_root *root, + struct btrfs_key *ref_key, + struct extent_buffer *node, int slot, u64 *refs, + int mode) { - struct extent_backref *node; - struct tree_backref *back; - struct btrfs_root *ref_root; struct btrfs_key key; - struct btrfs_path path; - struct extent_buffer *parent; - int level; - int found = 0; + struct btrfs_inode_extref *extref; + char namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 name_len; + u64 index; + u64 parent; int ret; + int err = 0; - list_for_each_entry(node, &rec->backrefs, list) { - if (node->is_data) - continue; - if (!node->found_ref) - continue; - if (node->full_backref) - continue; - back = (struct tree_backref *)node; - if (btrfs_header_owner(buf) == back->root) - return 0; + extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref); + total = btrfs_item_size_nr(node, slot); + +next: + /* update inode ref count */ + (*refs)++; + name_len = btrfs_inode_extref_name_len(node, extref); + index = btrfs_inode_extref_index(node, extref); + parent = btrfs_inode_extref_parent(node, extref); + if (name_len <= BTRFS_NAME_LEN) { + len = name_len; + } else { + len = BTRFS_NAME_LEN; + warning("root %llu INODE_EXTREF[%llu %llu] name too long", + root->objectid, ref_key->objectid, ref_key->offset); } - BUG_ON(rec->is_root); + read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len); - /* try to find the block by search corresponding fs tree */ - key.objectid = btrfs_header_owner(buf); - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; + /* Check root dir ref name */ + if (index == 0 && strncmp(namebuf, "..", name_len)) { + error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s", + root->objectid, ref_key->objectid, ref_key->offset, + namebuf); + err |= ROOT_DIR_ERROR; + } - ref_root = btrfs_read_fs_root(root->fs_info, &key); - if (IS_ERR(ref_root)) - return 1; + /* find related dir_index */ + key.objectid = parent; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = index; + ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode); + err |= ret; - level = btrfs_header_level(buf); - if (level == 0) - btrfs_item_key_to_cpu(buf, &key, 0); - else - btrfs_node_key_to_cpu(buf, &key, 0); + /* find related dir_item */ + key.objectid = parent; + key.type = BTRFS_DIR_ITEM_KEY; + key.offset = btrfs_name_hash(namebuf, len); + ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode); + err |= ret; - btrfs_init_path(&path); - path.lowest_level = level + 1; - ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0); - if (ret < 0) - return 0; + len = sizeof(*extref) + name_len; + extref = (struct btrfs_inode_extref *)((char *)extref + len); + cur += len; - parent = path.nodes[level + 1]; - if (parent && buf->start == btrfs_node_blockptr(parent, - path.slots[level + 1])) - found = 1; + if (cur < total) + goto next; - btrfs_release_path(&path); - return found ? 0 : 1; + return err; } -static int is_extent_tree_record(struct extent_record *rec) +/* + * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified + * DIR_ITEM/DIR_INDEX match. + * + * @root: the root of the fs/file tree + * @key: the key of the INODE_REF/INODE_EXTREF + * @name: the name in the INODE_REF/INODE_EXTREF + * @namelen: the length of name in the INODE_REF/INODE_EXTREF + * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index + * to (u64)-1 + * @ext_ref: the EXTENDED_IREF feature + * + * Return 0 if no error occurred. + * Return >0 for error bitmap + */ +static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key, + char *name, int namelen, u64 index, + unsigned int ext_ref) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; - struct tree_backref *back; - int is_extent = 0; + struct btrfs_path path; + struct btrfs_inode_ref *ref; + struct btrfs_inode_extref *extref; + struct extent_buffer *node; + char ref_namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 ref_namelen; + u64 ref_index; + u64 parent; + u64 dir_id; + int slot; + int ret; - while(cur != &rec->backrefs) { - node = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (node->is_data) - return 0; - back = (struct tree_backref *)node; - if (node->full_backref) - return 0; - if (back->root == BTRFS_EXTENT_TREE_OBJECTID) - is_extent = 1; + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, key, &path, 0, 0); + if (ret) { + ret = INODE_REF_MISSING; + goto extref; } - return is_extent; -} - - -static int record_bad_block_io(struct btrfs_fs_info *info, - struct cache_tree *extent_cache, - u64 start, u64 len) -{ - struct extent_record *rec; - struct cache_extent *cache; - struct btrfs_key key; - cache = lookup_cache_extent(extent_cache, start, len); - if (!cache) - return 0; + node = path.nodes[0]; + slot = path.slots[0]; - rec = container_of(cache, struct extent_record, cache); - if (!is_extent_tree_record(rec)) - return 0; + ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref); + total = btrfs_item_size_nr(node, slot); - btrfs_disk_key_to_cpu(&key, &rec->parent_key); - return btrfs_add_corrupt_extent_record(info, &key, start, len, 0); -} + /* Iterate all entry of INODE_REF */ + while (cur < total) { + ret = INODE_REF_MISSING; -static int swap_values(struct btrfs_root *root, struct btrfs_path *path, - struct extent_buffer *buf, int slot) -{ - if (btrfs_header_level(buf)) { - struct btrfs_key_ptr ptr1, ptr2; + ref_namelen = btrfs_inode_ref_name_len(node, ref); + ref_index = btrfs_inode_ref_index(node, ref); + if (index != (u64)-1 && index != ref_index) + goto next_ref; - read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot), - sizeof(struct btrfs_key_ptr)); - read_extent_buffer(buf, &ptr2, - btrfs_node_key_ptr_offset(slot + 1), - sizeof(struct btrfs_key_ptr)); - write_extent_buffer(buf, &ptr1, - btrfs_node_key_ptr_offset(slot + 1), - sizeof(struct btrfs_key_ptr)); - write_extent_buffer(buf, &ptr2, - btrfs_node_key_ptr_offset(slot), - sizeof(struct btrfs_key_ptr)); - if (slot == 0) { - struct btrfs_disk_key key; - btrfs_node_key(buf, &key, 0); - btrfs_fixup_low_keys(root, path, &key, - btrfs_header_level(buf) + 1); + if (ref_namelen <= BTRFS_NAME_LEN) { + len = ref_namelen; + } else { + len = BTRFS_NAME_LEN; + warning("root %llu INODE %s[%llu %llu] name too long", + root->objectid, + key->type == BTRFS_INODE_REF_KEY ? + "REF" : "EXTREF", + key->objectid, key->offset); } - } else { - struct btrfs_item *item1, *item2; - struct btrfs_key k1, k2; - char *item1_data, *item2_data; - u32 item1_offset, item2_offset, item1_size, item2_size; + read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1), + len); - item1 = btrfs_item_nr(slot); - item2 = btrfs_item_nr(slot + 1); - btrfs_item_key_to_cpu(buf, &k1, slot); - btrfs_item_key_to_cpu(buf, &k2, slot + 1); - item1_offset = btrfs_item_offset(buf, item1); - item2_offset = btrfs_item_offset(buf, item2); - item1_size = btrfs_item_size(buf, item1); - item2_size = btrfs_item_size(buf, item2); + if (len != namelen || strncmp(ref_namebuf, name, len)) + goto next_ref; - item1_data = malloc(item1_size); - if (!item1_data) - return -ENOMEM; - item2_data = malloc(item2_size); - if (!item2_data) { - free(item1_data); - return -ENOMEM; - } + ret = 0; + goto out; +next_ref: + len = sizeof(*ref) + ref_namelen; + ref = (struct btrfs_inode_ref *)((char *)ref + len); + cur += len; + } - read_extent_buffer(buf, item1_data, item1_offset, item1_size); - read_extent_buffer(buf, item2_data, item2_offset, item2_size); +extref: + /* Skip if not support EXTENDED_IREF feature */ + if (!ext_ref) + goto out; - write_extent_buffer(buf, item1_data, item2_offset, item2_size); - write_extent_buffer(buf, item2_data, item1_offset, item1_size); - free(item1_data); - free(item2_data); + btrfs_release_path(&path); + btrfs_init_path(&path); - btrfs_set_item_offset(buf, item1, item2_offset); - btrfs_set_item_offset(buf, item2, item1_offset); - btrfs_set_item_size(buf, item1, item2_size); - btrfs_set_item_size(buf, item2, item1_size); + dir_id = key->offset; + key->type = BTRFS_INODE_EXTREF_KEY; + key->offset = btrfs_extref_hash(dir_id, name, namelen); - path->slots[0] = slot; - btrfs_set_item_key_unsafe(root, path, &k2); - path->slots[0] = slot + 1; - btrfs_set_item_key_unsafe(root, path, &k1); + ret = btrfs_search_slot(NULL, root, key, &path, 0, 0); + if (ret) { + ret = INODE_REF_MISSING; + goto out; } - return 0; -} -static int fix_key_order(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path) -{ - struct extent_buffer *buf; - struct btrfs_key k1, k2; - int i; - int level = path->lowest_level; - int ret = -EIO; + node = path.nodes[0]; + slot = path.slots[0]; - buf = path->nodes[level]; - for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) { - if (level) { - btrfs_node_key_to_cpu(buf, &k1, i); - btrfs_node_key_to_cpu(buf, &k2, i + 1); - } else { - btrfs_item_key_to_cpu(buf, &k1, i); - btrfs_item_key_to_cpu(buf, &k2, i + 1); - } - if (btrfs_comp_cpu_keys(&k1, &k2) < 0) - continue; - ret = swap_values(root, path, buf, i); - if (ret) - break; - btrfs_mark_buffer_dirty(buf); - i = 0; - } - return ret; -} - -static int delete_bogus_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct extent_buffer *buf, int slot) -{ - struct btrfs_key key; - int nritems = btrfs_header_nritems(buf); + extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref); + cur = 0; + total = btrfs_item_size_nr(node, slot); - btrfs_item_key_to_cpu(buf, &key, slot); + /* Iterate all entry of INODE_EXTREF */ + while (cur < total) { + ret = INODE_REF_MISSING; - /* These are all the keys we can deal with missing. */ - if (key.type != BTRFS_DIR_INDEX_KEY && - key.type != BTRFS_EXTENT_ITEM_KEY && - key.type != BTRFS_METADATA_ITEM_KEY && - key.type != BTRFS_TREE_BLOCK_REF_KEY && - key.type != BTRFS_EXTENT_DATA_REF_KEY) - return -1; + ref_namelen = btrfs_inode_extref_name_len(node, extref); + ref_index = btrfs_inode_extref_index(node, extref); + parent = btrfs_inode_extref_parent(node, extref); + if (index != (u64)-1 && index != ref_index) + goto next_extref; - printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n", - (unsigned long long)key.objectid, key.type, - (unsigned long long)key.offset, slot, buf->start); - memmove_extent_buffer(buf, btrfs_item_nr_offset(slot), - btrfs_item_nr_offset(slot + 1), - sizeof(struct btrfs_item) * - (nritems - slot - 1)); - btrfs_set_header_nritems(buf, nritems - 1); - if (slot == 0) { - struct btrfs_disk_key disk_key; + if (parent != dir_id) + goto next_extref; - btrfs_item_key(buf, &disk_key, 0); - btrfs_fixup_low_keys(root, path, &disk_key, 1); - } - btrfs_mark_buffer_dirty(buf); - return 0; -} + if (ref_namelen <= BTRFS_NAME_LEN) { + len = ref_namelen; + } else { + len = BTRFS_NAME_LEN; + warning("Warning: root %llu INODE %s[%llu %llu] name too long\n", + root->objectid, + key->type == BTRFS_INODE_REF_KEY ? + "REF" : "EXTREF", + key->objectid, key->offset); + } + read_extent_buffer(node, ref_namebuf, + (unsigned long)(extref + 1), len); -static int fix_item_offset(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path) -{ - struct extent_buffer *buf; - int i; - int ret = 0; + if (len != namelen || strncmp(ref_namebuf, name, len)) + goto next_extref; - /* We should only get this for leaves */ - BUG_ON(path->lowest_level); - buf = path->nodes[0]; -again: - for (i = 0; i < btrfs_header_nritems(buf); i++) { - unsigned int shift = 0, offset; + ret = 0; + goto out; - if (i == 0 && btrfs_item_end_nr(buf, i) != - BTRFS_LEAF_DATA_SIZE(root)) { - if (btrfs_item_end_nr(buf, i) > - BTRFS_LEAF_DATA_SIZE(root)) { - ret = delete_bogus_item(trans, root, path, - buf, i); - if (!ret) - goto again; - fprintf(stderr, "item is off the end of the " - "leaf, can't fix\n"); - ret = -EIO; - break; - } - shift = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_end_nr(buf, i); - } else if (i > 0 && btrfs_item_end_nr(buf, i) != - btrfs_item_offset_nr(buf, i - 1)) { - if (btrfs_item_end_nr(buf, i) > - btrfs_item_offset_nr(buf, i - 1)) { - ret = delete_bogus_item(trans, root, path, - buf, i); - if (!ret) - goto again; - fprintf(stderr, "items overlap, can't fix\n"); - ret = -EIO; - break; - } - shift = btrfs_item_offset_nr(buf, i - 1) - - btrfs_item_end_nr(buf, i); - } - if (!shift) - continue; +next_extref: + len = sizeof(*extref) + ref_namelen; + extref = (struct btrfs_inode_extref *)((char *)extref + len); + cur += len; - printf("Shifting item nr %d by %u bytes in block %llu\n", - i, shift, (unsigned long long)buf->start); - offset = btrfs_item_offset_nr(buf, i); - memmove_extent_buffer(buf, - btrfs_leaf_data(buf) + offset + shift, - btrfs_leaf_data(buf) + offset, - btrfs_item_size_nr(buf, i)); - btrfs_set_item_offset(buf, btrfs_item_nr(i), - offset + shift); - btrfs_mark_buffer_dirty(buf); } - - /* - * We may have moved things, in which case we want to exit so we don't - * write those changes out. Once we have proper abort functionality in - * progs this can be changed to something nicer. - */ - BUG_ON(ret); +out: + btrfs_release_path(&path); return ret; } /* - * Attempt to fix basic block failures. If we can't fix it for whatever reason - * then just return -EIO. + * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and + * call find_inode_ref() to check related INODE_REF/INODE_EXTREF. + * + * @root: the root of the fs/file tree + * @key: the key of the INODE_REF/INODE_EXTREF + * @size: the st_size of the INODE_ITEM + * @ext_ref: the EXTENDED_IREF feature + * + * Return 0 if no error occurred. */ -static int try_to_fix_bad_block(struct btrfs_root *root, - struct extent_buffer *buf, - enum btrfs_tree_block_status status) +static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *node, int slot, u64 *size, + unsigned int ext_ref) { - struct btrfs_trans_handle *trans; - struct ulist *roots; - struct ulist_node *node; - struct btrfs_root *search_root; - struct btrfs_path *path; - struct ulist_iterator iter; - struct btrfs_key root_key, key; + struct btrfs_dir_item *di; + struct btrfs_inode_item *ii; + struct btrfs_path path; + struct btrfs_key location; + char namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 name_len; + u32 data_len; + u8 filetype; + u32 mode; + u64 index; int ret; + int err = 0; - if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER && - status != BTRFS_TREE_BLOCK_INVALID_OFFSETS) - return -EIO; - - path = btrfs_alloc_path(); - if (!path) - return -EIO; - - ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, - 0, &roots); - if (ret) { - btrfs_free_path(path); - return -EIO; - } + /* + * For DIR_ITEM set index to (u64)-1, so that find_inode_ref + * ignore index check. + */ + index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1; - ULIST_ITER_INIT(&iter); - while ((node = ulist_next(roots, &iter))) { - root_key.objectid = node->val; - root_key.type = BTRFS_ROOT_ITEM_KEY; - root_key.offset = (u64)-1; + di = btrfs_item_ptr(node, slot, struct btrfs_dir_item); + total = btrfs_item_size_nr(node, slot); - search_root = btrfs_read_fs_root(root->fs_info, &root_key); - if (IS_ERR(root)) { - ret = -EIO; - break; + while (cur < total) { + data_len = btrfs_dir_data_len(node, di); + if (data_len) + error("root %llu %s[%llu %llu] data_len shouldn't be %u", + root->objectid, key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", + key->objectid, key->offset, data_len); + + name_len = btrfs_dir_name_len(node, di); + if (name_len <= BTRFS_NAME_LEN) { + len = name_len; + } else { + len = BTRFS_NAME_LEN; + warning("root %llu %s[%llu %llu] name too long", + root->objectid, + key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", + key->objectid, key->offset); } + (*size) += name_len; + read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len); + filetype = btrfs_dir_type(node, di); - trans = btrfs_start_transaction(search_root, 0); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; - } + btrfs_init_path(&path); + btrfs_dir_item_key_to_cpu(node, di, &location); - path->lowest_level = btrfs_header_level(buf); - path->skip_check_block = 1; - if (path->lowest_level) - btrfs_node_key_to_cpu(buf, &key, 0); - else - btrfs_item_key_to_cpu(buf, &key, 0); - ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1); + /* Ignore related ROOT_ITEM check */ + if (location.type == BTRFS_ROOT_ITEM_KEY) + goto next; + + /* Check relative INODE_ITEM(existence/filetype) */ + ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0); if (ret) { - ret = -EIO; - btrfs_commit_transaction(trans, search_root); - break; + err |= INODE_ITEM_MISSING; + error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x", + root->objectid, key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", key->objectid, + key->offset, location.objectid, name_len, + namebuf, filetype); + goto next; } - if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) - ret = fix_key_order(trans, search_root, path); - else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) - ret = fix_item_offset(trans, search_root, path); - if (ret) { - btrfs_commit_transaction(trans, search_root); + + ii = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_inode_item); + mode = btrfs_inode_mode(path.nodes[0], ii); + + if (imode_to_type(mode) != filetype) { + err |= INODE_ITEM_MISMATCH; + error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d", + root->objectid, key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", key->objectid, + key->offset, name_len, namebuf, filetype); + } + + /* Check relative INODE_REF/INODE_EXTREF */ + location.type = BTRFS_INODE_REF_KEY; + location.offset = key->objectid; + ret = find_inode_ref(root, &location, namebuf, len, + index, ext_ref); + err |= ret; + if (ret & INODE_REF_MISSING) + error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d", + root->objectid, key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", key->objectid, + key->offset, name_len, namebuf, filetype); + +next: + btrfs_release_path(&path); + len = sizeof(*di) + name_len + data_len; + di = (struct btrfs_dir_item *)((char *)di + len); + cur += len; + + if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) { + error("root %llu DIR_INDEX[%llu %llu] should contain only one entry", + root->objectid, key->objectid, key->offset); break; } - btrfs_release_path(path); - btrfs_commit_transaction(trans, search_root); } - ulist_free(roots); - btrfs_free_path(path); - return ret; + + return err; } -static int check_block(struct btrfs_root *root, - struct cache_tree *extent_cache, - struct extent_buffer *buf, u64 flags) +static int all_backpointers_checked(struct extent_record *rec, int print_errs) { - struct extent_record *rec; - struct cache_extent *cache; - struct btrfs_key key; - enum btrfs_tree_block_status status; - int ret = 0; - int level; - - cache = lookup_cache_extent(extent_cache, buf->start, buf->len); - if (!cache) - return 1; - rec = container_of(cache, struct extent_record, cache); - rec->generation = btrfs_header_generation(buf); - - level = btrfs_header_level(buf); - if (btrfs_header_nritems(buf) > 0) { - - if (level == 0) - btrfs_item_key_to_cpu(buf, &key, 0); - else - btrfs_node_key_to_cpu(buf, &key, 0); - - rec->info_objectid = key.objectid; - } - rec->info_level = level; - - if (btrfs_is_leaf(buf)) - status = btrfs_check_leaf(root, &rec->parent_key, buf); - else - status = btrfs_check_node(root, &rec->parent_key, buf); + struct list_head *cur = rec->backrefs.next; + struct extent_backref *back; + struct tree_backref *tback; + struct data_backref *dback; + u64 found = 0; + int err = 0; - if (status != BTRFS_TREE_BLOCK_CLEAN) { - if (repair) - status = try_to_fix_bad_block(root, buf, status); - if (status != BTRFS_TREE_BLOCK_CLEAN) { - ret = -EIO; - fprintf(stderr, "bad block %llu\n", - (unsigned long long)buf->start); - } else { - /* - * Signal to callers we need to start the scan over - * again since we'll have cow'ed blocks. - */ - ret = -EAGAIN; + while(cur != &rec->backrefs) { + back = to_extent_backref(cur); + cur = cur->next; + if (!back->found_extent_tree) { + err = 1; + if (!print_errs) + goto out; + if (back->is_data) { + dback = to_data_backref(back); + fprintf(stderr, "Backref %llu %s %llu" + " owner %llu offset %llu num_refs %lu" + " not found in extent tree\n", + (unsigned long long)rec->start, + back->full_backref ? + "parent" : "root", + back->full_backref ? + (unsigned long long)dback->parent: + (unsigned long long)dback->root, + (unsigned long long)dback->owner, + (unsigned long long)dback->offset, + (unsigned long)dback->num_refs); + } else { + tback = to_tree_backref(back); + fprintf(stderr, "Backref %llu parent %llu" + " root %llu not found in extent tree\n", + (unsigned long long)rec->start, + (unsigned long long)tback->parent, + (unsigned long long)tback->root); + } } - } else { - rec->content_checked = 1; - if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) - rec->owner_ref_checked = 1; - else { - ret = check_owner_ref(root, rec, buf); - if (!ret) - rec->owner_ref_checked = 1; + if (!back->is_data && !back->found_ref) { + err = 1; + if (!print_errs) + goto out; + tback = to_tree_backref(back); + fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n", + (unsigned long long)rec->start, + back->full_backref ? "parent" : "root", + back->full_backref ? + (unsigned long long)tback->parent : + (unsigned long long)tback->root, back); } - } - if (!ret) - maybe_free_extent_rec(extent_cache, rec); - return ret; -} - -static struct tree_backref *find_tree_backref(struct extent_record *rec, - u64 parent, u64 root) -{ - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; - struct tree_backref *back; + if (back->is_data) { + dback = to_data_backref(back); + if (dback->found_ref != dback->num_refs) { + err = 1; + if (!print_errs) + goto out; + fprintf(stderr, "Incorrect local backref count" + " on %llu %s %llu owner %llu" + " offset %llu found %u wanted %u back %p\n", + (unsigned long long)rec->start, + back->full_backref ? + "parent" : "root", + back->full_backref ? + (unsigned long long)dback->parent: + (unsigned long long)dback->root, + (unsigned long long)dback->owner, + (unsigned long long)dback->offset, + dback->found_ref, dback->num_refs, back); + } + if (dback->disk_bytenr != rec->start) { + err = 1; + if (!print_errs) + goto out; + fprintf(stderr, "Backref disk bytenr does not" + " match extent record, bytenr=%llu, " + "ref bytenr=%llu\n", + (unsigned long long)rec->start, + (unsigned long long)dback->disk_bytenr); + } - while(cur != &rec->backrefs) { - node = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (node->is_data) - continue; - back = (struct tree_backref *)node; - if (parent > 0) { - if (!node->full_backref) - continue; - if (parent == back->parent) - return back; + if (dback->bytes != rec->nr) { + err = 1; + if (!print_errs) + goto out; + fprintf(stderr, "Backref bytes do not match " + "extent backref, bytenr=%llu, ref " + "bytes=%llu, backref bytes=%llu\n", + (unsigned long long)rec->start, + (unsigned long long)rec->nr, + (unsigned long long)dback->bytes); + } + } + if (!back->is_data) { + found += 1; } else { - if (node->full_backref) - continue; - if (back->root == root) - return back; + dback = to_data_backref(back); + found += dback->found_ref; } } - return NULL; + if (found != rec->refs) { + err = 1; + if (!print_errs) + goto out; + fprintf(stderr, "Incorrect global backref count " + "on %llu found %llu wanted %llu\n", + (unsigned long long)rec->start, + (unsigned long long)found, + (unsigned long long)rec->refs); + } +out: + return err; } -static struct tree_backref *alloc_tree_backref(struct extent_record *rec, - u64 parent, u64 root) +static int free_all_extent_backrefs(struct extent_record *rec) { - struct tree_backref *ref = malloc(sizeof(*ref)); - - if (!ref) - return NULL; - memset(&ref->node, 0, sizeof(ref->node)); - if (parent > 0) { - ref->parent = parent; - ref->node.full_backref = 1; - } else { - ref->root = root; - ref->node.full_backref = 0; + struct extent_backref *back; + struct list_head *cur; + while (!list_empty(&rec->backrefs)) { + cur = rec->backrefs.next; + back = to_extent_backref(cur); + list_del(cur); + free(back); } - list_add_tail(&ref->node.list, &rec->backrefs); - - return ref; + return 0; } -static struct data_backref *find_data_backref(struct extent_record *rec, - u64 parent, u64 root, - u64 owner, u64 offset, - int found_ref, - u64 disk_bytenr, u64 bytes) +static void free_extent_record_cache(struct btrfs_fs_info *fs_info, + struct cache_tree *extent_cache) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; - struct data_backref *back; + struct cache_extent *cache; + struct extent_record *rec; - while(cur != &rec->backrefs) { - node = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (!node->is_data) - continue; - back = (struct data_backref *)node; - if (parent > 0) { - if (!node->full_backref) - continue; - if (parent == back->parent) - return back; - } else { - if (node->full_backref) - continue; - if (back->root == root && back->owner == owner && - back->offset == offset) { - if (found_ref && node->found_ref && - (back->bytes != bytes || - back->disk_bytenr != disk_bytenr)) - continue; - return back; - } - } + while (1) { + cache = first_cache_extent(extent_cache); + if (!cache) + break; + rec = container_of(cache, struct extent_record, cache); + remove_cache_extent(extent_cache, cache); + free_all_extent_backrefs(rec); + free(rec); } - return NULL; } -static struct data_backref *alloc_data_backref(struct extent_record *rec, - u64 parent, u64 root, - u64 owner, u64 offset, - u64 max_size) +static int maybe_free_extent_rec(struct cache_tree *extent_cache, + struct extent_record *rec) { - struct data_backref *ref = malloc(sizeof(*ref)); - - if (!ref) - return NULL; - memset(&ref->node, 0, sizeof(ref->node)); - ref->node.is_data = 1; - - if (parent > 0) { - ref->parent = parent; - ref->owner = 0; - ref->offset = 0; - ref->node.full_backref = 1; - } else { - ref->root = root; - ref->owner = owner; - ref->offset = offset; - ref->node.full_backref = 0; + if (rec->content_checked && rec->owner_ref_checked && + rec->extent_item_refs == rec->refs && rec->refs > 0 && + rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) && + !rec->bad_full_backref && !rec->crossing_stripes && + !rec->wrong_chunk_type) { + remove_cache_extent(extent_cache, &rec->cache); + free_all_extent_backrefs(rec); + list_del_init(&rec->list); + free(rec); } - ref->bytes = max_size; - ref->found_ref = 0; - ref->num_refs = 0; - list_add_tail(&ref->node.list, &rec->backrefs); - if (max_size > rec->max_size) - rec->max_size = max_size; - return ref; + return 0; } -/* Check if the type of extent matches with its chunk */ -static void check_extent_type(struct extent_record *rec) +static int check_owner_ref(struct btrfs_root *root, + struct extent_record *rec, + struct extent_buffer *buf) { - struct btrfs_block_group_cache *bg_cache; - - bg_cache = btrfs_lookup_first_block_group(global_info, rec->start); - if (!bg_cache) - return; + struct extent_backref *node; + struct tree_backref *back; + struct btrfs_root *ref_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *parent; + int level; + int found = 0; + int ret; - /* data extent, check chunk directly*/ - if (!rec->metadata) { - if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) - rec->wrong_chunk_type = 1; - return; - } - - /* metadata extent, check the obvious case first */ - if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM | - BTRFS_BLOCK_GROUP_METADATA))) { - rec->wrong_chunk_type = 1; - return; - } - - /* - * Check SYSTEM extent, as it's also marked as metadata, we can only - * make sure it's a SYSTEM extent by its backref - */ - if (!list_empty(&rec->backrefs)) { - struct extent_backref *node; - struct tree_backref *tback; - u64 bg_type; - - node = list_entry(rec->backrefs.next, struct extent_backref, - list); - if (node->is_data) { - /* tree block shouldn't have data backref */ - rec->wrong_chunk_type = 1; - return; - } - tback = container_of(node, struct tree_backref, node); - - if (tback->root == BTRFS_CHUNK_TREE_OBJECTID) - bg_type = BTRFS_BLOCK_GROUP_SYSTEM; - else - bg_type = BTRFS_BLOCK_GROUP_METADATA; - if (!(bg_cache->flags & bg_type)) - rec->wrong_chunk_type = 1; + list_for_each_entry(node, &rec->backrefs, list) { + if (node->is_data) + continue; + if (!node->found_ref) + continue; + if (node->full_backref) + continue; + back = to_tree_backref(node); + if (btrfs_header_owner(buf) == back->root) + return 0; } -} + BUG_ON(rec->is_root); -static int add_extent_rec_nolookup(struct cache_tree *extent_cache, - struct btrfs_key *parent_key, u64 parent_gen, - u64 start, u64 nr, u64 extent_item_refs, - int is_root, int inc_ref, int set_checked, - int metadata, int extent_rec, u64 max_size) -{ - struct extent_record *rec; - int ret = 0; + /* try to find the block by search corresponding fs tree */ + key.objectid = btrfs_header_owner(buf); + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; - rec = malloc(sizeof(*rec)); - if (!rec) - return -ENOMEM; - rec->start = start; - rec->max_size = max_size; - rec->nr = max(nr, max_size); - rec->found_rec = !!extent_rec; - rec->content_checked = 0; - rec->owner_ref_checked = 0; - rec->num_duplicates = 0; - rec->metadata = metadata; - rec->flag_block_full_backref = -1; - rec->bad_full_backref = 0; - rec->crossing_stripes = 0; - rec->wrong_chunk_type = 0; - INIT_LIST_HEAD(&rec->backrefs); - INIT_LIST_HEAD(&rec->dups); - INIT_LIST_HEAD(&rec->list); + ref_root = btrfs_read_fs_root(root->fs_info, &key); + if (IS_ERR(ref_root)) + return 1; - if (is_root) - rec->is_root = 1; + level = btrfs_header_level(buf); + if (level == 0) + btrfs_item_key_to_cpu(buf, &key, 0); else - rec->is_root = 0; + btrfs_node_key_to_cpu(buf, &key, 0); - if (inc_ref) - rec->refs = 1; - else - rec->refs = 0; + btrfs_init_path(&path); + path.lowest_level = level + 1; + ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0); + if (ret < 0) + return 0; - if (extent_item_refs) - rec->extent_item_refs = extent_item_refs; - else - rec->extent_item_refs = 0; + parent = path.nodes[level + 1]; + if (parent && buf->start == btrfs_node_blockptr(parent, + path.slots[level + 1])) + found = 1; - if (parent_key) - btrfs_cpu_key_to_disk(&rec->parent_key, parent_key); - else - memset(&rec->parent_key, 0, sizeof(*parent_key)); + btrfs_release_path(&path); + return found ? 0 : 1; +} - if (parent_gen) - rec->parent_generation = parent_gen; - else - rec->parent_generation = 0; +static int is_extent_tree_record(struct extent_record *rec) +{ + struct list_head *cur = rec->backrefs.next; + struct extent_backref *node; + struct tree_backref *back; + int is_extent = 0; - rec->cache.start = start; - rec->cache.size = nr; - ret = insert_cache_extent(extent_cache, &rec->cache); - BUG_ON(ret); - bytes_used += nr; - if (set_checked) { - rec->content_checked = 1; - rec->owner_ref_checked = 1; + while(cur != &rec->backrefs) { + node = to_extent_backref(cur); + cur = cur->next; + if (node->is_data) + return 0; + back = to_tree_backref(node); + if (node->full_backref) + return 0; + if (back->root == BTRFS_EXTENT_TREE_OBJECTID) + is_extent = 1; } - - if (metadata) - rec->crossing_stripes = check_crossing_stripes(rec->start, - rec->max_size); - check_extent_type(rec); - return ret; + return is_extent; } -static int add_extent_rec(struct cache_tree *extent_cache, - struct btrfs_key *parent_key, u64 parent_gen, - u64 start, u64 nr, u64 extent_item_refs, - int is_root, int inc_ref, int set_checked, - int metadata, int extent_rec, u64 max_size) + +static int record_bad_block_io(struct btrfs_fs_info *info, + struct cache_tree *extent_cache, + u64 start, u64 len) { struct extent_record *rec; struct cache_extent *cache; - int ret = 0; - int dup = 0; + struct btrfs_key key; - cache = lookup_cache_extent(extent_cache, start, nr); - if (cache) { - rec = container_of(cache, struct extent_record, cache); - if (inc_ref) - rec->refs++; - if (rec->nr == 1) - rec->nr = max(nr, max_size); + cache = lookup_cache_extent(extent_cache, start, len); + if (!cache) + return 0; - /* - * We need to make sure to reset nr to whatever the extent - * record says was the real size, this way we can compare it to - * the backrefs. - */ - if (extent_rec) { - if (start != rec->start || rec->found_rec) { - struct extent_record *tmp; + rec = container_of(cache, struct extent_record, cache); + if (!is_extent_tree_record(rec)) + return 0; - dup = 1; - if (list_empty(&rec->list)) - list_add_tail(&rec->list, - &duplicate_extents); + btrfs_disk_key_to_cpu(&key, &rec->parent_key); + return btrfs_add_corrupt_extent_record(info, &key, start, len, 0); +} - /* - * We have to do this song and dance in case we - * find an extent record that falls inside of - * our current extent record but does not have - * the same objectid. - */ - tmp = malloc(sizeof(*tmp)); - if (!tmp) - return -ENOMEM; - tmp->start = start; - tmp->max_size = max_size; - tmp->nr = nr; - tmp->found_rec = 1; - tmp->metadata = metadata; - tmp->extent_item_refs = extent_item_refs; - INIT_LIST_HEAD(&tmp->list); - list_add_tail(&tmp->list, &rec->dups); - rec->num_duplicates++; - } else { - rec->nr = nr; - rec->found_rec = 1; - } - } +static int swap_values(struct btrfs_root *root, struct btrfs_path *path, + struct extent_buffer *buf, int slot) +{ + if (btrfs_header_level(buf)) { + struct btrfs_key_ptr ptr1, ptr2; - if (extent_item_refs && !dup) { - if (rec->extent_item_refs) { - fprintf(stderr, "block %llu rec " - "extent_item_refs %llu, passed %llu\n", - (unsigned long long)start, - (unsigned long long) - rec->extent_item_refs, - (unsigned long long)extent_item_refs); - } - rec->extent_item_refs = extent_item_refs; - } - if (is_root) - rec->is_root = 1; - if (set_checked) { - rec->content_checked = 1; - rec->owner_ref_checked = 1; + read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot), + sizeof(struct btrfs_key_ptr)); + read_extent_buffer(buf, &ptr2, + btrfs_node_key_ptr_offset(slot + 1), + sizeof(struct btrfs_key_ptr)); + write_extent_buffer(buf, &ptr1, + btrfs_node_key_ptr_offset(slot + 1), + sizeof(struct btrfs_key_ptr)); + write_extent_buffer(buf, &ptr2, + btrfs_node_key_ptr_offset(slot), + sizeof(struct btrfs_key_ptr)); + if (slot == 0) { + struct btrfs_disk_key key; + btrfs_node_key(buf, &key, 0); + btrfs_fixup_low_keys(root, path, &key, + btrfs_header_level(buf) + 1); } + } else { + struct btrfs_item *item1, *item2; + struct btrfs_key k1, k2; + char *item1_data, *item2_data; + u32 item1_offset, item2_offset, item1_size, item2_size; + + item1 = btrfs_item_nr(slot); + item2 = btrfs_item_nr(slot + 1); + btrfs_item_key_to_cpu(buf, &k1, slot); + btrfs_item_key_to_cpu(buf, &k2, slot + 1); + item1_offset = btrfs_item_offset(buf, item1); + item2_offset = btrfs_item_offset(buf, item2); + item1_size = btrfs_item_size(buf, item1); + item2_size = btrfs_item_size(buf, item2); - if (parent_key) - btrfs_cpu_key_to_disk(&rec->parent_key, parent_key); - if (parent_gen) - rec->parent_generation = parent_gen; + item1_data = malloc(item1_size); + if (!item1_data) + return -ENOMEM; + item2_data = malloc(item2_size); + if (!item2_data) { + free(item1_data); + return -ENOMEM; + } - if (rec->max_size < max_size) - rec->max_size = max_size; + read_extent_buffer(buf, item1_data, item1_offset, item1_size); + read_extent_buffer(buf, item2_data, item2_offset, item2_size); - /* - * A metadata extent can't cross stripe_len boundary, otherwise - * kernel scrub won't be able to handle it. - * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check - * it. - */ - if (metadata) - rec->crossing_stripes = check_crossing_stripes( - rec->start, rec->max_size); - check_extent_type(rec); - maybe_free_extent_rec(extent_cache, rec); - return ret; - } + write_extent_buffer(buf, item1_data, item2_offset, item2_size); + write_extent_buffer(buf, item2_data, item1_offset, item1_size); + free(item1_data); + free(item2_data); - ret = add_extent_rec_nolookup(extent_cache, parent_key, parent_gen, - start, nr, extent_item_refs, is_root, inc_ref, - set_checked, metadata, extent_rec, max_size); + btrfs_set_item_offset(buf, item1, item2_offset); + btrfs_set_item_offset(buf, item2, item1_offset); + btrfs_set_item_size(buf, item1, item2_size); + btrfs_set_item_size(buf, item2, item1_size); - return ret; + path->slots[0] = slot; + btrfs_set_item_key_unsafe(root, path, &k2); + path->slots[0] = slot + 1; + btrfs_set_item_key_unsafe(root, path, &k1); + } + return 0; } -static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, - u64 parent, u64 root, int found_ref) +static int fix_key_order(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) { - struct extent_record *rec; - struct tree_backref *back; - struct cache_extent *cache; - - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) { - add_extent_rec(extent_cache, NULL, 0, bytenr, - 1, 0, 0, 0, 0, 1, 0, 0); - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) - abort(); - } + struct extent_buffer *buf; + struct btrfs_key k1, k2; + int i; + int level = path->lowest_level; + int ret = -EIO; - rec = container_of(cache, struct extent_record, cache); - if (rec->start != bytenr) { - abort(); + buf = path->nodes[level]; + for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) { + if (level) { + btrfs_node_key_to_cpu(buf, &k1, i); + btrfs_node_key_to_cpu(buf, &k2, i + 1); + } else { + btrfs_item_key_to_cpu(buf, &k1, i); + btrfs_item_key_to_cpu(buf, &k2, i + 1); + } + if (btrfs_comp_cpu_keys(&k1, &k2) < 0) + continue; + ret = swap_values(root, path, buf, i); + if (ret) + break; + btrfs_mark_buffer_dirty(buf); + i = 0; } + return ret; +} - back = find_tree_backref(rec, parent, root); - if (!back) { - back = alloc_tree_backref(rec, parent, root); - BUG_ON(!back); - } +static int delete_bogus_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *buf, int slot) +{ + struct btrfs_key key; + int nritems = btrfs_header_nritems(buf); - if (found_ref) { - if (back->node.found_ref) { - fprintf(stderr, "Extent back ref already exists " - "for %llu parent %llu root %llu \n", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root); - } - back->node.found_ref = 1; - } else { - if (back->node.found_extent_tree) { - fprintf(stderr, "Extent back ref already exists " - "for %llu parent %llu root %llu \n", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root); - } - back->node.found_extent_tree = 1; + btrfs_item_key_to_cpu(buf, &key, slot); + + /* These are all the keys we can deal with missing. */ + if (key.type != BTRFS_DIR_INDEX_KEY && + key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY && + key.type != BTRFS_TREE_BLOCK_REF_KEY && + key.type != BTRFS_EXTENT_DATA_REF_KEY) + return -1; + + printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n", + (unsigned long long)key.objectid, key.type, + (unsigned long long)key.offset, slot, buf->start); + memmove_extent_buffer(buf, btrfs_item_nr_offset(slot), + btrfs_item_nr_offset(slot + 1), + sizeof(struct btrfs_item) * + (nritems - slot - 1)); + btrfs_set_header_nritems(buf, nritems - 1); + if (slot == 0) { + struct btrfs_disk_key disk_key; + + btrfs_item_key(buf, &disk_key, 0); + btrfs_fixup_low_keys(root, path, &disk_key, 1); } - check_extent_type(rec); - maybe_free_extent_rec(extent_cache, rec); + btrfs_mark_buffer_dirty(buf); return 0; } -static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, - u64 parent, u64 root, u64 owner, u64 offset, - u32 num_refs, int found_ref, u64 max_size) +static int fix_item_offset(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) { - struct extent_record *rec; - struct data_backref *back; - struct cache_extent *cache; + struct extent_buffer *buf; + int i; + int ret = 0; - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) { - add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0, - 0, 0, max_size); - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) - abort(); - } + /* We should only get this for leaves */ + BUG_ON(path->lowest_level); + buf = path->nodes[0]; +again: + for (i = 0; i < btrfs_header_nritems(buf); i++) { + unsigned int shift = 0, offset; - rec = container_of(cache, struct extent_record, cache); - if (rec->max_size < max_size) - rec->max_size = max_size; + if (i == 0 && btrfs_item_end_nr(buf, i) != + BTRFS_LEAF_DATA_SIZE(root)) { + if (btrfs_item_end_nr(buf, i) > + BTRFS_LEAF_DATA_SIZE(root)) { + ret = delete_bogus_item(trans, root, path, + buf, i); + if (!ret) + goto again; + fprintf(stderr, "item is off the end of the " + "leaf, can't fix\n"); + ret = -EIO; + break; + } + shift = BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_end_nr(buf, i); + } else if (i > 0 && btrfs_item_end_nr(buf, i) != + btrfs_item_offset_nr(buf, i - 1)) { + if (btrfs_item_end_nr(buf, i) > + btrfs_item_offset_nr(buf, i - 1)) { + ret = delete_bogus_item(trans, root, path, + buf, i); + if (!ret) + goto again; + fprintf(stderr, "items overlap, can't fix\n"); + ret = -EIO; + break; + } + shift = btrfs_item_offset_nr(buf, i - 1) - + btrfs_item_end_nr(buf, i); + } + if (!shift) + continue; - /* - * If found_ref is set then max_size is the real size and must match the - * existing refs. So if we have already found a ref then we need to - * make sure that this ref matches the existing one, otherwise we need - * to add a new backref so we can notice that the backrefs don't match - * and we need to figure out who is telling the truth. This is to - * account for that awful fsync bug I introduced where we'd end up with - * a btrfs_file_extent_item that would have its length include multiple - * prealloc extents or point inside of a prealloc extent. - */ - back = find_data_backref(rec, parent, root, owner, offset, found_ref, - bytenr, max_size); - if (!back) { - back = alloc_data_backref(rec, parent, root, owner, offset, - max_size); - BUG_ON(!back); + printf("Shifting item nr %d by %u bytes in block %llu\n", + i, shift, (unsigned long long)buf->start); + offset = btrfs_item_offset_nr(buf, i); + memmove_extent_buffer(buf, + btrfs_leaf_data(buf) + offset + shift, + btrfs_leaf_data(buf) + offset, + btrfs_item_size_nr(buf, i)); + btrfs_set_item_offset(buf, btrfs_item_nr(i), + offset + shift); + btrfs_mark_buffer_dirty(buf); } - if (found_ref) { - BUG_ON(num_refs != 1); - if (back->node.found_ref) - BUG_ON(back->bytes != max_size); - back->node.found_ref = 1; - back->found_ref += 1; - back->bytes = max_size; - back->disk_bytenr = bytenr; - rec->refs += 1; - rec->content_checked = 1; - rec->owner_ref_checked = 1; - } else { - if (back->node.found_extent_tree) { - fprintf(stderr, "Extent back ref already exists " - "for %llu parent %llu root %llu " - "owner %llu offset %llu num_refs %lu\n", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root, - (unsigned long long)owner, - (unsigned long long)offset, - (unsigned long)num_refs); - } - back->num_refs = num_refs; - back->node.found_extent_tree = 1; - } - maybe_free_extent_rec(extent_cache, rec); - return 0; + /* + * We may have moved things, in which case we want to exit so we don't + * write those changes out. Once we have proper abort functionality in + * progs this can be changed to something nicer. + */ + BUG_ON(ret); + return ret; } -static int add_pending(struct cache_tree *pending, - struct cache_tree *seen, u64 bytenr, u32 size) +/* + * Attempt to fix basic block failures. If we can't fix it for whatever reason + * then just return -EIO. + */ +static int try_to_fix_bad_block(struct btrfs_root *root, + struct extent_buffer *buf, + enum btrfs_tree_block_status status) { + struct btrfs_trans_handle *trans; + struct ulist *roots; + struct ulist_node *node; + struct btrfs_root *search_root; + struct btrfs_path path; + struct ulist_iterator iter; + struct btrfs_key root_key, key; int ret; - ret = add_cache_extent(seen, bytenr, size); + + if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER && + status != BTRFS_TREE_BLOCK_INVALID_OFFSETS) + return -EIO; + + ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots); if (ret) - return ret; - add_cache_extent(pending, bytenr, size); - return 0; -} + return -EIO; -static int pick_next_pending(struct cache_tree *pending, - struct cache_tree *reada, - struct cache_tree *nodes, - u64 last, struct block_info *bits, int bits_nr, - int *reada_bits) -{ - unsigned long node_start = last; - struct cache_extent *cache; - int ret; + btrfs_init_path(&path); + ULIST_ITER_INIT(&iter); + while ((node = ulist_next(roots, &iter))) { + root_key.objectid = node->val; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; - cache = search_cache_extent(reada, 0); - if (cache) { - bits[0].start = cache->start; - bits[0].size = cache->size; - *reada_bits = 1; - return 1; - } - *reada_bits = 0; - if (node_start > 32768) - node_start -= 32768; + search_root = btrfs_read_fs_root(root->fs_info, &root_key); + if (IS_ERR(root)) { + ret = -EIO; + break; + } - cache = search_cache_extent(nodes, node_start); - if (!cache) - cache = search_cache_extent(nodes, 0); - if (!cache) { - cache = search_cache_extent(pending, 0); - if (!cache) - return 0; - ret = 0; - do { - bits[ret].start = cache->start; - bits[ret].size = cache->size; - cache = next_cache_extent(cache); - ret++; - } while (cache && ret < bits_nr); - return ret; - } - - ret = 0; - do { - bits[ret].start = cache->start; - bits[ret].size = cache->size; - cache = next_cache_extent(cache); - ret++; - } while (cache && ret < bits_nr); + trans = btrfs_start_transaction(search_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } - if (bits_nr - ret > 8) { - u64 lookup = bits[0].start + bits[0].size; - struct cache_extent *next; - next = search_cache_extent(pending, lookup); - while(next) { - if (next->start - lookup > 32768) - break; - bits[ret].start = next->start; - bits[ret].size = next->size; - lookup = next->start + next->size; - ret++; - if (ret == bits_nr) - break; - next = next_cache_extent(next); - if (!next) - break; + path.lowest_level = btrfs_header_level(buf); + path.skip_check_block = 1; + if (path.lowest_level) + btrfs_node_key_to_cpu(buf, &key, 0); + else + btrfs_item_key_to_cpu(buf, &key, 0); + ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1); + if (ret) { + ret = -EIO; + btrfs_commit_transaction(trans, search_root); + break; + } + if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) + ret = fix_key_order(trans, search_root, &path); + else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) + ret = fix_item_offset(trans, search_root, &path); + if (ret) { + btrfs_commit_transaction(trans, search_root); + break; } + btrfs_release_path(&path); + btrfs_commit_transaction(trans, search_root); } + ulist_free(roots); + btrfs_release_path(&path); return ret; } -static void free_chunk_record(struct cache_extent *cache) -{ - struct chunk_record *rec; - - rec = container_of(cache, struct chunk_record, cache); - list_del_init(&rec->list); - list_del_init(&rec->dextents); - free(rec); -} - -void free_chunk_cache_tree(struct cache_tree *chunk_cache) +static int check_block(struct btrfs_root *root, + struct cache_tree *extent_cache, + struct extent_buffer *buf, u64 flags) { - cache_tree_free_extents(chunk_cache, free_chunk_record); -} + struct extent_record *rec; + struct cache_extent *cache; + struct btrfs_key key; + enum btrfs_tree_block_status status; + int ret = 0; + int level; -static void free_device_record(struct rb_node *node) -{ - struct device_record *rec; + cache = lookup_cache_extent(extent_cache, buf->start, buf->len); + if (!cache) + return 1; + rec = container_of(cache, struct extent_record, cache); + rec->generation = btrfs_header_generation(buf); - rec = container_of(node, struct device_record, node); - free(rec); -} + level = btrfs_header_level(buf); + if (btrfs_header_nritems(buf) > 0) { -FREE_RB_BASED_TREE(device_cache, free_device_record); + if (level == 0) + btrfs_item_key_to_cpu(buf, &key, 0); + else + btrfs_node_key_to_cpu(buf, &key, 0); -int insert_block_group_record(struct block_group_tree *tree, - struct block_group_record *bg_rec) -{ - int ret; + rec->info_objectid = key.objectid; + } + rec->info_level = level; - ret = insert_cache_extent(&tree->tree, &bg_rec->cache); - if (ret) - return ret; + if (btrfs_is_leaf(buf)) + status = btrfs_check_leaf(root, &rec->parent_key, buf); + else + status = btrfs_check_node(root, &rec->parent_key, buf); - list_add_tail(&bg_rec->list, &tree->block_groups); - return 0; + if (status != BTRFS_TREE_BLOCK_CLEAN) { + if (repair) + status = try_to_fix_bad_block(root, buf, status); + if (status != BTRFS_TREE_BLOCK_CLEAN) { + ret = -EIO; + fprintf(stderr, "bad block %llu\n", + (unsigned long long)buf->start); + } else { + /* + * Signal to callers we need to start the scan over + * again since we'll have cowed blocks. + */ + ret = -EAGAIN; + } + } else { + rec->content_checked = 1; + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) + rec->owner_ref_checked = 1; + else { + ret = check_owner_ref(root, rec, buf); + if (!ret) + rec->owner_ref_checked = 1; + } + } + if (!ret) + maybe_free_extent_rec(extent_cache, rec); + return ret; } -static void free_block_group_record(struct cache_extent *cache) +static struct tree_backref *find_tree_backref(struct extent_record *rec, + u64 parent, u64 root) { - struct block_group_record *rec; - - rec = container_of(cache, struct block_group_record, cache); - list_del_init(&rec->list); - free(rec); -} + struct list_head *cur = rec->backrefs.next; + struct extent_backref *node; + struct tree_backref *back; -void free_block_group_tree(struct block_group_tree *tree) -{ - cache_tree_free_extents(&tree->tree, free_block_group_record); + while(cur != &rec->backrefs) { + node = to_extent_backref(cur); + cur = cur->next; + if (node->is_data) + continue; + back = to_tree_backref(node); + if (parent > 0) { + if (!node->full_backref) + continue; + if (parent == back->parent) + return back; + } else { + if (node->full_backref) + continue; + if (back->root == root) + return back; + } + } + return NULL; } -int insert_device_extent_record(struct device_extent_tree *tree, - struct device_extent_record *de_rec) +static struct tree_backref *alloc_tree_backref(struct extent_record *rec, + u64 parent, u64 root) { - int ret; + struct tree_backref *ref = malloc(sizeof(*ref)); - /* - * Device extent is a bit different from the other extents, because - * the extents which belong to the different devices may have the - * same start and size, so we need use the special extent cache - * search/insert functions. - */ - ret = insert_cache_extent2(&tree->tree, &de_rec->cache); - if (ret) - return ret; + if (!ref) + return NULL; + memset(&ref->node, 0, sizeof(ref->node)); + if (parent > 0) { + ref->parent = parent; + ref->node.full_backref = 1; + } else { + ref->root = root; + ref->node.full_backref = 0; + } + list_add_tail(&ref->node.list, &rec->backrefs); - list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans); - list_add_tail(&de_rec->device_list, &tree->no_device_orphans); - return 0; + return ref; } -static void free_device_extent_record(struct cache_extent *cache) +static struct data_backref *find_data_backref(struct extent_record *rec, + u64 parent, u64 root, + u64 owner, u64 offset, + int found_ref, + u64 disk_bytenr, u64 bytes) { - struct device_extent_record *rec; + struct list_head *cur = rec->backrefs.next; + struct extent_backref *node; + struct data_backref *back; - rec = container_of(cache, struct device_extent_record, cache); - if (!list_empty(&rec->chunk_list)) - list_del_init(&rec->chunk_list); - if (!list_empty(&rec->device_list)) - list_del_init(&rec->device_list); - free(rec); + while(cur != &rec->backrefs) { + node = to_extent_backref(cur); + cur = cur->next; + if (!node->is_data) + continue; + back = to_data_backref(node); + if (parent > 0) { + if (!node->full_backref) + continue; + if (parent == back->parent) + return back; + } else { + if (node->full_backref) + continue; + if (back->root == root && back->owner == owner && + back->offset == offset) { + if (found_ref && node->found_ref && + (back->bytes != bytes || + back->disk_bytenr != disk_bytenr)) + continue; + return back; + } + } + } + return NULL; } -void free_device_extent_tree(struct device_extent_tree *tree) +static struct data_backref *alloc_data_backref(struct extent_record *rec, + u64 parent, u64 root, + u64 owner, u64 offset, + u64 max_size) { - cache_tree_free_extents(&tree->tree, free_device_extent_record); -} + struct data_backref *ref = malloc(sizeof(*ref)); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 -static int process_extent_ref_v0(struct cache_tree *extent_cache, - struct extent_buffer *leaf, int slot) -{ - struct btrfs_extent_ref_v0 *ref0; - struct btrfs_key key; + if (!ref) + return NULL; + memset(&ref->node, 0, sizeof(ref->node)); + ref->node.is_data = 1; - btrfs_item_key_to_cpu(leaf, &key, slot); - ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0); - if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) { - add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0); + if (parent > 0) { + ref->parent = parent; + ref->owner = 0; + ref->offset = 0; + ref->node.full_backref = 1; } else { - add_data_backref(extent_cache, key.objectid, key.offset, 0, - 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0); + ref->root = root; + ref->owner = owner; + ref->offset = offset; + ref->node.full_backref = 0; } - return 0; + ref->bytes = max_size; + ref->found_ref = 0; + ref->num_refs = 0; + list_add_tail(&ref->node.list, &rec->backrefs); + if (max_size > rec->max_size) + rec->max_size = max_size; + return ref; } -#endif -struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf, - struct btrfs_key *key, - int slot) +/* Check if the type of extent matches with its chunk */ +static void check_extent_type(struct extent_record *rec) { - struct btrfs_chunk *ptr; - struct chunk_record *rec; - int num_stripes, i; + struct btrfs_block_group_cache *bg_cache; - ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); - num_stripes = btrfs_chunk_num_stripes(leaf, ptr); + bg_cache = btrfs_lookup_first_block_group(global_info, rec->start); + if (!bg_cache) + return; - rec = calloc(1, btrfs_chunk_record_size(num_stripes)); - if (!rec) { - fprintf(stderr, "memory allocation failed\n"); - exit(-1); + /* data extent, check chunk directly*/ + if (!rec->metadata) { + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) + rec->wrong_chunk_type = 1; + return; } - INIT_LIST_HEAD(&rec->list); - INIT_LIST_HEAD(&rec->dextents); - rec->bg_rec = NULL; - - rec->cache.start = key->offset; - rec->cache.size = btrfs_chunk_length(leaf, ptr); - - rec->generation = btrfs_header_generation(leaf); + /* metadata extent, check the obvious case first */ + if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + rec->wrong_chunk_type = 1; + return; + } - rec->objectid = key->objectid; - rec->type = key->type; - rec->offset = key->offset; + /* + * Check SYSTEM extent, as it's also marked as metadata, we can only + * make sure it's a SYSTEM extent by its backref + */ + if (!list_empty(&rec->backrefs)) { + struct extent_backref *node; + struct tree_backref *tback; + u64 bg_type; - rec->length = rec->cache.size; - rec->owner = btrfs_chunk_owner(leaf, ptr); - rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr); - rec->type_flags = btrfs_chunk_type(leaf, ptr); - rec->io_width = btrfs_chunk_io_width(leaf, ptr); - rec->io_align = btrfs_chunk_io_align(leaf, ptr); - rec->sector_size = btrfs_chunk_sector_size(leaf, ptr); - rec->num_stripes = num_stripes; - rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr); + node = to_extent_backref(rec->backrefs.next); + if (node->is_data) { + /* tree block shouldn't have data backref */ + rec->wrong_chunk_type = 1; + return; + } + tback = container_of(node, struct tree_backref, node); - for (i = 0; i < rec->num_stripes; ++i) { - rec->stripes[i].devid = - btrfs_stripe_devid_nr(leaf, ptr, i); - rec->stripes[i].offset = - btrfs_stripe_offset_nr(leaf, ptr, i); - read_extent_buffer(leaf, rec->stripes[i].dev_uuid, - (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i), - BTRFS_UUID_SIZE); + if (tback->root == BTRFS_CHUNK_TREE_OBJECTID) + bg_type = BTRFS_BLOCK_GROUP_SYSTEM; + else + bg_type = BTRFS_BLOCK_GROUP_METADATA; + if (!(bg_cache->flags & bg_type)) + rec->wrong_chunk_type = 1; } - - return rec; } -static int process_chunk_item(struct cache_tree *chunk_cache, - struct btrfs_key *key, struct extent_buffer *eb, - int slot) +/* + * Allocate a new extent record, fill default values from @tmpl and insert int + * @extent_cache. Caller is supposed to make sure the [start,nr) is not in + * the cache, otherwise it fails. + */ +static int add_extent_rec_nolookup(struct cache_tree *extent_cache, + struct extent_record *tmpl) { - struct chunk_record *rec; + struct extent_record *rec; int ret = 0; - rec = btrfs_new_chunk_record(eb, key, slot); - ret = insert_cache_extent(chunk_cache, &rec->cache); + rec = malloc(sizeof(*rec)); + if (!rec) + return -ENOMEM; + rec->start = tmpl->start; + rec->max_size = tmpl->max_size; + rec->nr = max(tmpl->nr, tmpl->max_size); + rec->found_rec = tmpl->found_rec; + rec->content_checked = tmpl->content_checked; + rec->owner_ref_checked = tmpl->owner_ref_checked; + rec->num_duplicates = 0; + rec->metadata = tmpl->metadata; + rec->flag_block_full_backref = FLAG_UNSET; + rec->bad_full_backref = 0; + rec->crossing_stripes = 0; + rec->wrong_chunk_type = 0; + rec->is_root = tmpl->is_root; + rec->refs = tmpl->refs; + rec->extent_item_refs = tmpl->extent_item_refs; + rec->parent_generation = tmpl->parent_generation; + INIT_LIST_HEAD(&rec->backrefs); + INIT_LIST_HEAD(&rec->dups); + INIT_LIST_HEAD(&rec->list); + memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key)); + rec->cache.start = tmpl->start; + rec->cache.size = tmpl->nr; + ret = insert_cache_extent(extent_cache, &rec->cache); if (ret) { - fprintf(stderr, "Chunk[%llu, %llu] existed.\n", - rec->offset, rec->length); free(rec); + return ret; } + bytes_used += rec->nr; + if (tmpl->metadata) + rec->crossing_stripes = check_crossing_stripes(global_info, + rec->start, global_info->tree_root->nodesize); + check_extent_type(rec); return ret; } -static int process_device_item(struct rb_root *dev_cache, - struct btrfs_key *key, struct extent_buffer *eb, int slot) +/* + * Lookup and modify an extent, some values of @tmpl are interpreted verbatim, + * some are hints: + * - refs - if found, increase refs + * - is_root - if found, set + * - content_checked - if found, set + * - owner_ref_checked - if found, set + * + * If not found, create a new one, initialize and insert. + */ +static int add_extent_rec(struct cache_tree *extent_cache, + struct extent_record *tmpl) { - struct btrfs_dev_item *ptr; - struct device_record *rec; + struct extent_record *rec; + struct cache_extent *cache; int ret = 0; + int dup = 0; - ptr = btrfs_item_ptr(eb, - slot, struct btrfs_dev_item); + cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr); + if (cache) { + rec = container_of(cache, struct extent_record, cache); + if (tmpl->refs) + rec->refs++; + if (rec->nr == 1) + rec->nr = max(tmpl->nr, tmpl->max_size); - rec = malloc(sizeof(*rec)); - if (!rec) { - fprintf(stderr, "memory allocation failed\n"); - return -ENOMEM; - } + /* + * We need to make sure to reset nr to whatever the extent + * record says was the real size, this way we can compare it to + * the backrefs. + */ + if (tmpl->found_rec) { + if (tmpl->start != rec->start || rec->found_rec) { + struct extent_record *tmp; - rec->devid = key->offset; - rec->generation = btrfs_header_generation(eb); + dup = 1; + if (list_empty(&rec->list)) + list_add_tail(&rec->list, + &duplicate_extents); - rec->objectid = key->objectid; - rec->type = key->type; - rec->offset = key->offset; + /* + * We have to do this song and dance in case we + * find an extent record that falls inside of + * our current extent record but does not have + * the same objectid. + */ + tmp = malloc(sizeof(*tmp)); + if (!tmp) + return -ENOMEM; + tmp->start = tmpl->start; + tmp->max_size = tmpl->max_size; + tmp->nr = tmpl->nr; + tmp->found_rec = 1; + tmp->metadata = tmpl->metadata; + tmp->extent_item_refs = tmpl->extent_item_refs; + INIT_LIST_HEAD(&tmp->list); + list_add_tail(&tmp->list, &rec->dups); + rec->num_duplicates++; + } else { + rec->nr = tmpl->nr; + rec->found_rec = 1; + } + } - rec->devid = btrfs_device_id(eb, ptr); - rec->total_byte = btrfs_device_total_bytes(eb, ptr); - rec->byte_used = btrfs_device_bytes_used(eb, ptr); + if (tmpl->extent_item_refs && !dup) { + if (rec->extent_item_refs) { + fprintf(stderr, "block %llu rec " + "extent_item_refs %llu, passed %llu\n", + (unsigned long long)tmpl->start, + (unsigned long long) + rec->extent_item_refs, + (unsigned long long)tmpl->extent_item_refs); + } + rec->extent_item_refs = tmpl->extent_item_refs; + } + if (tmpl->is_root) + rec->is_root = 1; + if (tmpl->content_checked) + rec->content_checked = 1; + if (tmpl->owner_ref_checked) + rec->owner_ref_checked = 1; + memcpy(&rec->parent_key, &tmpl->parent_key, + sizeof(tmpl->parent_key)); + if (tmpl->parent_generation) + rec->parent_generation = tmpl->parent_generation; + if (rec->max_size < tmpl->max_size) + rec->max_size = tmpl->max_size; - ret = rb_insert(dev_cache, &rec->node, device_record_compare); - if (ret) { - fprintf(stderr, "Device[%llu] existed.\n", rec->devid); - free(rec); + /* + * A metadata extent can't cross stripe_len boundary, otherwise + * kernel scrub won't be able to handle it. + * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check + * it. + */ + if (tmpl->metadata) + rec->crossing_stripes = check_crossing_stripes( + global_info, rec->start, + global_info->tree_root->nodesize); + check_extent_type(rec); + maybe_free_extent_rec(extent_cache, rec); + return ret; } + ret = add_extent_rec_nolookup(extent_cache, tmpl); + return ret; } -struct block_group_record * -btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key, - int slot) +static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, + u64 parent, u64 root, int found_ref) { - struct btrfs_block_group_item *ptr; - struct block_group_record *rec; + struct extent_record *rec; + struct tree_backref *back; + struct cache_extent *cache; + int ret; - rec = calloc(1, sizeof(*rec)); - if (!rec) { - fprintf(stderr, "memory allocation failed\n"); - exit(-1); - } + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (!cache) { + struct extent_record tmpl; - rec->cache.start = key->objectid; - rec->cache.size = key->offset; + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = bytenr; + tmpl.nr = 1; + tmpl.metadata = 1; - rec->generation = btrfs_header_generation(leaf); + ret = add_extent_rec_nolookup(extent_cache, &tmpl); + if (ret) + return ret; - rec->objectid = key->objectid; - rec->type = key->type; - rec->offset = key->offset; + /* really a bug in cache_extent implement now */ + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (!cache) + return -ENOENT; + } - ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item); - rec->flags = btrfs_disk_block_group_flags(leaf, ptr); + rec = container_of(cache, struct extent_record, cache); + if (rec->start != bytenr) { + /* + * Several cause, from unaligned bytenr to over lapping extents + */ + return -EEXIST; + } - INIT_LIST_HEAD(&rec->list); + back = find_tree_backref(rec, parent, root); + if (!back) { + back = alloc_tree_backref(rec, parent, root); + if (!back) + return -ENOMEM; + } - return rec; + if (found_ref) { + if (back->node.found_ref) { + fprintf(stderr, "Extent back ref already exists " + "for %llu parent %llu root %llu \n", + (unsigned long long)bytenr, + (unsigned long long)parent, + (unsigned long long)root); + } + back->node.found_ref = 1; + } else { + if (back->node.found_extent_tree) { + fprintf(stderr, "Extent back ref already exists " + "for %llu parent %llu root %llu \n", + (unsigned long long)bytenr, + (unsigned long long)parent, + (unsigned long long)root); + } + back->node.found_extent_tree = 1; + } + check_extent_type(rec); + maybe_free_extent_rec(extent_cache, rec); + return 0; } -static int process_block_group_item(struct block_group_tree *block_group_cache, - struct btrfs_key *key, - struct extent_buffer *eb, int slot) +static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, + u64 parent, u64 root, u64 owner, u64 offset, + u32 num_refs, int found_ref, u64 max_size) { - struct block_group_record *rec; - int ret = 0; + struct extent_record *rec; + struct data_backref *back; + struct cache_extent *cache; + int ret; - rec = btrfs_new_block_group_record(eb, key, slot); - ret = insert_block_group_record(block_group_cache, rec); - if (ret) { - fprintf(stderr, "Block Group[%llu, %llu] existed.\n", - rec->objectid, rec->offset); - free(rec); - } + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (!cache) { + struct extent_record tmpl; - return ret; -} + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = bytenr; + tmpl.nr = 1; + tmpl.max_size = max_size; -struct device_extent_record * -btrfs_new_device_extent_record(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) -{ - struct device_extent_record *rec; - struct btrfs_dev_extent *ptr; + ret = add_extent_rec_nolookup(extent_cache, &tmpl); + if (ret) + return ret; - rec = calloc(1, sizeof(*rec)); - if (!rec) { - fprintf(stderr, "memory allocation failed\n"); - exit(-1); + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (!cache) + abort(); } - rec->cache.objectid = key->objectid; - rec->cache.start = key->offset; - - rec->generation = btrfs_header_generation(leaf); - - rec->objectid = key->objectid; - rec->type = key->type; - rec->offset = key->offset; + rec = container_of(cache, struct extent_record, cache); + if (rec->max_size < max_size) + rec->max_size = max_size; - ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); - rec->chunk_objecteid = - btrfs_dev_extent_chunk_objectid(leaf, ptr); - rec->chunk_offset = - btrfs_dev_extent_chunk_offset(leaf, ptr); - rec->length = btrfs_dev_extent_length(leaf, ptr); - rec->cache.size = rec->length; + /* + * If found_ref is set then max_size is the real size and must match the + * existing refs. So if we have already found a ref then we need to + * make sure that this ref matches the existing one, otherwise we need + * to add a new backref so we can notice that the backrefs don't match + * and we need to figure out who is telling the truth. This is to + * account for that awful fsync bug I introduced where we'd end up with + * a btrfs_file_extent_item that would have its length include multiple + * prealloc extents or point inside of a prealloc extent. + */ + back = find_data_backref(rec, parent, root, owner, offset, found_ref, + bytenr, max_size); + if (!back) { + back = alloc_data_backref(rec, parent, root, owner, offset, + max_size); + BUG_ON(!back); + } - INIT_LIST_HEAD(&rec->chunk_list); - INIT_LIST_HEAD(&rec->device_list); + if (found_ref) { + BUG_ON(num_refs != 1); + if (back->node.found_ref) + BUG_ON(back->bytes != max_size); + back->node.found_ref = 1; + back->found_ref += 1; + back->bytes = max_size; + back->disk_bytenr = bytenr; + rec->refs += 1; + rec->content_checked = 1; + rec->owner_ref_checked = 1; + } else { + if (back->node.found_extent_tree) { + fprintf(stderr, "Extent back ref already exists " + "for %llu parent %llu root %llu " + "owner %llu offset %llu num_refs %lu\n", + (unsigned long long)bytenr, + (unsigned long long)parent, + (unsigned long long)root, + (unsigned long long)owner, + (unsigned long long)offset, + (unsigned long)num_refs); + } + back->num_refs = num_refs; + back->node.found_extent_tree = 1; + } + maybe_free_extent_rec(extent_cache, rec); + return 0; +} - return rec; +static int add_pending(struct cache_tree *pending, + struct cache_tree *seen, u64 bytenr, u32 size) +{ + int ret; + ret = add_cache_extent(seen, bytenr, size); + if (ret) + return ret; + add_cache_extent(pending, bytenr, size); + return 0; } -static int -process_device_extent_item(struct device_extent_tree *dev_extent_cache, - struct btrfs_key *key, struct extent_buffer *eb, - int slot) +static int pick_next_pending(struct cache_tree *pending, + struct cache_tree *reada, + struct cache_tree *nodes, + u64 last, struct block_info *bits, int bits_nr, + int *reada_bits) { - struct device_extent_record *rec; + unsigned long node_start = last; + struct cache_extent *cache; int ret; - rec = btrfs_new_device_extent_record(eb, key, slot); - ret = insert_device_extent_record(dev_extent_cache, rec); - if (ret) { - fprintf(stderr, - "Device extent[%llu, %llu, %llu] existed.\n", - rec->objectid, rec->offset, rec->length); - free(rec); + cache = search_cache_extent(reada, 0); + if (cache) { + bits[0].start = cache->start; + bits[0].size = cache->size; + *reada_bits = 1; + return 1; + } + *reada_bits = 0; + if (node_start > 32768) + node_start -= 32768; + + cache = search_cache_extent(nodes, node_start); + if (!cache) + cache = search_cache_extent(nodes, 0); + + if (!cache) { + cache = search_cache_extent(pending, 0); + if (!cache) + return 0; + ret = 0; + do { + bits[ret].start = cache->start; + bits[ret].size = cache->size; + cache = next_cache_extent(cache); + ret++; + } while (cache && ret < bits_nr); + return ret; } + ret = 0; + do { + bits[ret].start = cache->start; + bits[ret].size = cache->size; + cache = next_cache_extent(cache); + ret++; + } while (cache && ret < bits_nr); + + if (bits_nr - ret > 8) { + u64 lookup = bits[0].start + bits[0].size; + struct cache_extent *next; + next = search_cache_extent(pending, lookup); + while(next) { + if (next->start - lookup > 32768) + break; + bits[ret].start = next->start; + bits[ret].size = next->size; + lookup = next->start + next->size; + ret++; + if (ret == bits_nr) + break; + next = next_cache_extent(next); + if (!next) + break; + } + } return ret; } -static int process_extent_item(struct btrfs_root *root, - struct cache_tree *extent_cache, - struct extent_buffer *eb, int slot) +static void free_chunk_record(struct cache_extent *cache) { - struct btrfs_extent_item *ei; - struct btrfs_extent_inline_ref *iref; - struct btrfs_extent_data_ref *dref; - struct btrfs_shared_data_ref *sref; - struct btrfs_key key; - unsigned long end; - unsigned long ptr; - int type; - u32 item_size = btrfs_item_size_nr(eb, slot); - u64 refs = 0; - u64 offset; - u64 num_bytes; - int metadata = 0; + struct chunk_record *rec; - btrfs_item_key_to_cpu(eb, &key, slot); + rec = container_of(cache, struct chunk_record, cache); + list_del_init(&rec->list); + list_del_init(&rec->dextents); + free(rec); +} - if (key.type == BTRFS_METADATA_ITEM_KEY) { - metadata = 1; - num_bytes = root->nodesize; - } else { - num_bytes = key.offset; - } +void free_chunk_cache_tree(struct cache_tree *chunk_cache) +{ + cache_tree_free_extents(chunk_cache, free_chunk_record); +} - if (item_size < sizeof(*ei)) { -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - struct btrfs_extent_item_v0 *ei0; - BUG_ON(item_size != sizeof(*ei0)); - ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0); - refs = btrfs_extent_refs_v0(eb, ei0); -#else - BUG(); -#endif - return add_extent_rec(extent_cache, NULL, 0, key.objectid, - num_bytes, refs, 0, 0, 0, metadata, 1, - num_bytes); - } +static void free_device_record(struct rb_node *node) +{ + struct device_record *rec; - ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); - refs = btrfs_extent_refs(eb, ei); - if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) - metadata = 1; - else - metadata = 0; + rec = container_of(node, struct device_record, node); + free(rec); +} - add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes, - refs, 0, 0, 0, metadata, 1, num_bytes); +FREE_RB_BASED_TREE(device_cache, free_device_record); - ptr = (unsigned long)(ei + 1); - if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK && - key.type == BTRFS_EXTENT_ITEM_KEY) - ptr += sizeof(struct btrfs_tree_block_info); +int insert_block_group_record(struct block_group_tree *tree, + struct block_group_record *bg_rec) +{ + int ret; - end = (unsigned long)ei + item_size; - while (ptr < end) { - iref = (struct btrfs_extent_inline_ref *)ptr; - type = btrfs_extent_inline_ref_type(eb, iref); - offset = btrfs_extent_inline_ref_offset(eb, iref); - switch (type) { - case BTRFS_TREE_BLOCK_REF_KEY: - add_tree_backref(extent_cache, key.objectid, - 0, offset, 0); - break; - case BTRFS_SHARED_BLOCK_REF_KEY: - add_tree_backref(extent_cache, key.objectid, - offset, 0, 0); - break; - case BTRFS_EXTENT_DATA_REF_KEY: - dref = (struct btrfs_extent_data_ref *)(&iref->offset); - add_data_backref(extent_cache, key.objectid, 0, - btrfs_extent_data_ref_root(eb, dref), - btrfs_extent_data_ref_objectid(eb, - dref), - btrfs_extent_data_ref_offset(eb, dref), - btrfs_extent_data_ref_count(eb, dref), - 0, num_bytes); - break; - case BTRFS_SHARED_DATA_REF_KEY: - sref = (struct btrfs_shared_data_ref *)(iref + 1); - add_data_backref(extent_cache, key.objectid, offset, - 0, 0, 0, - btrfs_shared_data_ref_count(eb, sref), - 0, num_bytes); - break; - default: - fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n", - key.objectid, key.type, num_bytes); - goto out; - } - ptr += btrfs_extent_inline_ref_size(type); - } - WARN_ON(ptr > end); -out: + ret = insert_cache_extent(&tree->tree, &bg_rec->cache); + if (ret) + return ret; + + list_add_tail(&bg_rec->list, &tree->block_groups); return 0; } -static int check_cache_range(struct btrfs_root *root, - struct btrfs_block_group_cache *cache, - u64 offset, u64 bytes) +static void free_block_group_record(struct cache_extent *cache) { - struct btrfs_free_space *entry; - u64 *logical; - u64 bytenr; - int stripe_len; - int i, nr, ret; - - for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { - bytenr = btrfs_sb_offset(i); - ret = btrfs_rmap_block(&root->fs_info->mapping_tree, - cache->key.objectid, bytenr, 0, - &logical, &nr, &stripe_len); - if (ret) - return ret; + struct block_group_record *rec; - while (nr--) { - if (logical[nr] + stripe_len <= offset) - continue; - if (offset + bytes <= logical[nr]) - continue; - if (logical[nr] == offset) { - if (stripe_len >= bytes) { - kfree(logical); - return 0; - } - bytes -= stripe_len; - offset += stripe_len; - } else if (logical[nr] < offset) { - if (logical[nr] + stripe_len >= - offset + bytes) { - kfree(logical); - return 0; - } - bytes = (offset + bytes) - - (logical[nr] + stripe_len); - offset = logical[nr] + stripe_len; - } else { - /* - * Could be tricky, the super may land in the - * middle of the area we're checking. First - * check the easiest case, it's at the end. - */ - if (logical[nr] + stripe_len >= - bytes + offset) { - bytes = logical[nr] - offset; - continue; - } + rec = container_of(cache, struct block_group_record, cache); + list_del_init(&rec->list); + free(rec); +} - /* Check the left side */ - ret = check_cache_range(root, cache, - offset, - logical[nr] - offset); - if (ret) { - kfree(logical); - return ret; - } +void free_block_group_tree(struct block_group_tree *tree) +{ + cache_tree_free_extents(&tree->tree, free_block_group_record); +} - /* Now we continue with the right side */ - bytes = (offset + bytes) - - (logical[nr] + stripe_len); - offset = logical[nr] + stripe_len; - } - } +int insert_device_extent_record(struct device_extent_tree *tree, + struct device_extent_record *de_rec) +{ + int ret; - kfree(logical); - } + /* + * Device extent is a bit different from the other extents, because + * the extents which belong to the different devices may have the + * same start and size, so we need use the special extent cache + * search/insert functions. + */ + ret = insert_cache_extent2(&tree->tree, &de_rec->cache); + if (ret) + return ret; - entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes); - if (!entry) { - fprintf(stderr, "There is no free space entry for %Lu-%Lu\n", - offset, offset+bytes); - return -EINVAL; - } + list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans); + list_add_tail(&de_rec->device_list, &tree->no_device_orphans); + return 0; +} - if (entry->offset != offset) { - fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset, - entry->offset); - return -EINVAL; - } +static void free_device_extent_record(struct cache_extent *cache) +{ + struct device_extent_record *rec; - if (entry->bytes != bytes) { - fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n", - bytes, entry->bytes, offset); - return -EINVAL; - } + rec = container_of(cache, struct device_extent_record, cache); + if (!list_empty(&rec->chunk_list)) + list_del_init(&rec->chunk_list); + if (!list_empty(&rec->device_list)) + list_del_init(&rec->device_list); + free(rec); +} - unlink_free_space(cache->free_space_ctl, entry); - free(entry); - return 0; +void free_device_extent_tree(struct device_extent_tree *tree) +{ + cache_tree_free_extents(&tree->tree, free_device_extent_record); } -static int verify_space_cache(struct btrfs_root *root, - struct btrfs_block_group_cache *cache) +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 +static int process_extent_ref_v0(struct cache_tree *extent_cache, + struct extent_buffer *leaf, int slot) { - struct btrfs_path *path; - struct extent_buffer *leaf; + struct btrfs_extent_ref_v0 *ref0; struct btrfs_key key; - u64 last; - int ret = 0; + int ret; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + btrfs_item_key_to_cpu(leaf, &key, slot); + ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0); + if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) { + ret = add_tree_backref(extent_cache, key.objectid, key.offset, + 0, 0); + } else { + ret = add_data_backref(extent_cache, key.objectid, key.offset, + 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0); + } + return ret; +} +#endif - root = root->fs_info->extent_root; +struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf, + struct btrfs_key *key, + int slot) +{ + struct btrfs_chunk *ptr; + struct chunk_record *rec; + int num_stripes, i; - last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET); + ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, ptr); - key.objectid = last; - key.offset = 0; - key.type = BTRFS_EXTENT_ITEM_KEY; + rec = calloc(1, btrfs_chunk_record_size(num_stripes)); + if (!rec) { + fprintf(stderr, "memory allocation failed\n"); + exit(-1); + } - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; - ret = 0; - while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - goto out; - if (ret > 0) { - ret = 0; - break; - } - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid >= cache->key.offset + cache->key.objectid) - break; - if (key.type != BTRFS_EXTENT_ITEM_KEY && - key.type != BTRFS_METADATA_ITEM_KEY) { - path->slots[0]++; - continue; - } + INIT_LIST_HEAD(&rec->list); + INIT_LIST_HEAD(&rec->dextents); + rec->bg_rec = NULL; - if (last == key.objectid) { - if (key.type == BTRFS_EXTENT_ITEM_KEY) - last = key.objectid + key.offset; - else - last = key.objectid + root->nodesize; - path->slots[0]++; - continue; - } + rec->cache.start = key->offset; + rec->cache.size = btrfs_chunk_length(leaf, ptr); - ret = check_cache_range(root, cache, last, - key.objectid - last); - if (ret) - break; - if (key.type == BTRFS_EXTENT_ITEM_KEY) - last = key.objectid + key.offset; - else - last = key.objectid + root->nodesize; - path->slots[0]++; - } + rec->generation = btrfs_header_generation(leaf); - if (last < cache->key.objectid + cache->key.offset) - ret = check_cache_range(root, cache, last, - cache->key.objectid + - cache->key.offset - last); + rec->objectid = key->objectid; + rec->type = key->type; + rec->offset = key->offset; -out: - btrfs_free_path(path); + rec->length = rec->cache.size; + rec->owner = btrfs_chunk_owner(leaf, ptr); + rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr); + rec->type_flags = btrfs_chunk_type(leaf, ptr); + rec->io_width = btrfs_chunk_io_width(leaf, ptr); + rec->io_align = btrfs_chunk_io_align(leaf, ptr); + rec->sector_size = btrfs_chunk_sector_size(leaf, ptr); + rec->num_stripes = num_stripes; + rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr); - if (!ret && - !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) { - fprintf(stderr, "There are still entries left in the space " - "cache\n"); - ret = -EINVAL; + for (i = 0; i < rec->num_stripes; ++i) { + rec->stripes[i].devid = + btrfs_stripe_devid_nr(leaf, ptr, i); + rec->stripes[i].offset = + btrfs_stripe_offset_nr(leaf, ptr, i); + read_extent_buffer(leaf, rec->stripes[i].dev_uuid, + (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i), + BTRFS_UUID_SIZE); } - return ret; + return rec; } -static int check_space_cache(struct btrfs_root *root) +static int process_chunk_item(struct cache_tree *chunk_cache, + struct btrfs_key *key, struct extent_buffer *eb, + int slot) { - struct btrfs_block_group_cache *cache; - u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE; - int ret; - int error = 0; + struct chunk_record *rec; + struct btrfs_chunk *chunk; + int ret = 0; - if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL && - btrfs_super_generation(root->fs_info->super_copy) != - btrfs_super_cache_generation(root->fs_info->super_copy)) { - printf("cache and super generation don't match, space cache " - "will be invalidated\n"); + chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); + /* + * Do extra check for this chunk item, + * + * It's still possible one can craft a leaf with CHUNK_ITEM, with + * wrong onwer(3) out of chunk tree, to pass both chunk tree check + * and owner<->key_type check. + */ + ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot, + key->offset); + if (ret < 0) { + error("chunk(%llu, %llu) is not valid, ignore it", + key->offset, btrfs_chunk_length(eb, chunk)); return 0; } - - if (ctx.progress_enabled) { - ctx.tp = TASK_FREE_SPACE; - task_start(ctx.info); + rec = btrfs_new_chunk_record(eb, key, slot); + ret = insert_cache_extent(chunk_cache, &rec->cache); + if (ret) { + fprintf(stderr, "Chunk[%llu, %llu] existed.\n", + rec->offset, rec->length); + free(rec); } - while (1) { - cache = btrfs_lookup_first_block_group(root->fs_info, start); - if (!cache) - break; + return ret; +} - start = cache->key.objectid + cache->key.offset; - if (!cache->free_space_ctl) { - if (btrfs_init_free_space_ctl(cache, - root->sectorsize)) { - ret = -ENOMEM; - break; - } - } else { - btrfs_remove_free_space_cache(cache); - } +static int process_device_item(struct rb_root *dev_cache, + struct btrfs_key *key, struct extent_buffer *eb, int slot) +{ + struct btrfs_dev_item *ptr; + struct device_record *rec; + int ret = 0; - if (btrfs_fs_compat_ro(root->fs_info, - BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { - ret = exclude_super_stripes(root, cache); - if (ret) { - fprintf(stderr, "could not exclude super stripes: %s\n", - strerror(-ret)); - error++; - continue; - } - ret = load_free_space_tree(root->fs_info, cache); - free_excluded_extents(root, cache); - if (ret < 0) { - fprintf(stderr, "could not load free space tree: %s\n", - strerror(-ret)); - error++; - continue; - } - error += ret; - } else { - ret = load_free_space_cache(root->fs_info, cache); - if (!ret) - continue; - } + ptr = btrfs_item_ptr(eb, + slot, struct btrfs_dev_item); - ret = verify_space_cache(root, cache); - if (ret) { - fprintf(stderr, "cache appears valid but isnt %Lu\n", - cache->key.objectid); - error++; - } + rec = malloc(sizeof(*rec)); + if (!rec) { + fprintf(stderr, "memory allocation failed\n"); + return -ENOMEM; } - task_stop(ctx.info); + rec->devid = key->offset; + rec->generation = btrfs_header_generation(eb); - return error ? -EINVAL : 0; + rec->objectid = key->objectid; + rec->type = key->type; + rec->offset = key->offset; + + rec->devid = btrfs_device_id(eb, ptr); + rec->total_byte = btrfs_device_total_bytes(eb, ptr); + rec->byte_used = btrfs_device_bytes_used(eb, ptr); + + ret = rb_insert(dev_cache, &rec->node, device_record_compare); + if (ret) { + fprintf(stderr, "Device[%llu] existed.\n", rec->devid); + free(rec); + } + + return ret; } -static int check_extent_csums(struct btrfs_root *root, u64 bytenr, - u64 num_bytes, unsigned long leaf_offset, - struct extent_buffer *eb) { +struct block_group_record * +btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key, + int slot) +{ + struct btrfs_block_group_item *ptr; + struct block_group_record *rec; - u64 offset = 0; - u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); - char *data; - unsigned long csum_offset; - u32 csum; - u32 csum_expected; - u64 read_len; - u64 data_checked = 0; - u64 tmp; - int ret = 0; - int mirror; - int num_copies; + rec = calloc(1, sizeof(*rec)); + if (!rec) { + fprintf(stderr, "memory allocation failed\n"); + exit(-1); + } - if (num_bytes % root->sectorsize) - return -EINVAL; + rec->cache.start = key->objectid; + rec->cache.size = key->offset; - data = malloc(num_bytes); - if (!data) - return -ENOMEM; + rec->generation = btrfs_header_generation(leaf); - while (offset < num_bytes) { - mirror = 0; -again: - read_len = num_bytes - offset; - /* read as much space once a time */ - ret = read_extent_data(root, data + offset, - bytenr + offset, &read_len, mirror); - if (ret) - goto out; - data_checked = 0; - /* verify every 4k data's checksum */ - while (data_checked < read_len) { - csum = ~(u32)0; - tmp = offset + data_checked; + rec->objectid = key->objectid; + rec->type = key->type; + rec->offset = key->offset; - csum = btrfs_csum_data(NULL, (char *)data + tmp, - csum, root->sectorsize); - btrfs_csum_final(csum, (char *)&csum); + ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item); + rec->flags = btrfs_disk_block_group_flags(leaf, ptr); - csum_offset = leaf_offset + - tmp / root->sectorsize * csum_size; - read_extent_buffer(eb, (char *)&csum_expected, - csum_offset, csum_size); - /* try another mirror */ - if (csum != csum_expected) { - fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n", - mirror, bytenr + tmp, - csum, csum_expected); - num_copies = btrfs_num_copies( - &root->fs_info->mapping_tree, - bytenr, num_bytes); - if (mirror < num_copies - 1) { - mirror += 1; - goto again; - } - } - data_checked += root->sectorsize; - } - offset += read_len; + INIT_LIST_HEAD(&rec->list); + + return rec; +} + +static int process_block_group_item(struct block_group_tree *block_group_cache, + struct btrfs_key *key, + struct extent_buffer *eb, int slot) +{ + struct block_group_record *rec; + int ret = 0; + + rec = btrfs_new_block_group_record(eb, key, slot); + ret = insert_block_group_record(block_group_cache, rec); + if (ret) { + fprintf(stderr, "Block Group[%llu, %llu] existed.\n", + rec->objectid, rec->offset); + free(rec); } -out: - free(data); + return ret; } -static int check_extent_exists(struct btrfs_root *root, u64 bytenr, - u64 num_bytes) +struct device_extent_record * +btrfs_new_device_extent_record(struct extent_buffer *leaf, + struct btrfs_key *key, int slot) { - struct btrfs_path *path; - struct extent_buffer *leaf; - struct btrfs_key key; - int ret; + struct device_extent_record *rec; + struct btrfs_dev_extent *ptr; - path = btrfs_alloc_path(); - if (!path) { - fprintf(stderr, "Error allocing path\n"); - return -ENOMEM; + rec = calloc(1, sizeof(*rec)); + if (!rec) { + fprintf(stderr, "memory allocation failed\n"); + exit(-1); } - key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = (u64)-1; + rec->cache.objectid = key->objectid; + rec->cache.start = key->offset; -again: - ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, - 0, 0); - if (ret < 0) { - fprintf(stderr, "Error looking up extent record %d\n", ret); - btrfs_free_path(path); - return ret; - } else if (ret) { - if (path->slots[0] > 0) { - path->slots[0]--; - } else { - ret = btrfs_prev_leaf(root, path); - if (ret < 0) { - goto out; - } else if (ret > 0) { - ret = 0; - goto out; - } - } - } + rec->generation = btrfs_header_generation(leaf); - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + rec->objectid = key->objectid; + rec->type = key->type; + rec->offset = key->offset; - /* - * Block group items come before extent items if they have the same - * bytenr, so walk back one more just in case. Dear future traveler, - * first congrats on mastering time travel. Now if it's not too much - * trouble could you go back to 2006 and tell Chris to make the - * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the - * EXTENT_ITEM_KEY please? - */ - while (key.type > BTRFS_EXTENT_ITEM_KEY) { - if (path->slots[0] > 0) { - path->slots[0]--; - } else { - ret = btrfs_prev_leaf(root, path); - if (ret < 0) { - goto out; - } else if (ret > 0) { - ret = 0; - goto out; - } - } - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - } + ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + rec->chunk_objecteid = + btrfs_dev_extent_chunk_objectid(leaf, ptr); + rec->chunk_offset = + btrfs_dev_extent_chunk_offset(leaf, ptr); + rec->length = btrfs_dev_extent_length(leaf, ptr); + rec->cache.size = rec->length; - while (num_bytes) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) { - fprintf(stderr, "Error going to next leaf " - "%d\n", ret); - btrfs_free_path(path); - return ret; - } else if (ret) { - break; - } - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.type != BTRFS_EXTENT_ITEM_KEY) { - path->slots[0]++; - continue; - } - if (key.objectid + key.offset < bytenr) { - path->slots[0]++; - continue; - } - if (key.objectid > bytenr + num_bytes) - break; + INIT_LIST_HEAD(&rec->chunk_list); + INIT_LIST_HEAD(&rec->device_list); - if (key.objectid == bytenr) { - if (key.offset >= num_bytes) { - num_bytes = 0; - break; - } - num_bytes -= key.offset; - bytenr += key.offset; - } else if (key.objectid < bytenr) { - if (key.objectid + key.offset >= bytenr + num_bytes) { - num_bytes = 0; - break; - } - num_bytes = (bytenr + num_bytes) - - (key.objectid + key.offset); - bytenr = key.objectid + key.offset; - } else { - if (key.objectid + key.offset < bytenr + num_bytes) { - u64 new_start = key.objectid + key.offset; - u64 new_bytes = bytenr + num_bytes - new_start; + return rec; +} - /* - * Weird case, the extent is in the middle of - * our range, we'll have to search one side - * and then the other. Not sure if this happens - * in real life, but no harm in coding it up - * anyway just in case. - */ - btrfs_release_path(path); - ret = check_extent_exists(root, new_start, - new_bytes); - if (ret) { - fprintf(stderr, "Right section didn't " - "have a record\n"); - break; - } - num_bytes = key.objectid - bytenr; - goto again; - } - num_bytes = key.objectid - bytenr; - } - path->slots[0]++; - } - ret = 0; +static int +process_device_extent_item(struct device_extent_tree *dev_extent_cache, + struct btrfs_key *key, struct extent_buffer *eb, + int slot) +{ + struct device_extent_record *rec; + int ret; -out: - if (num_bytes && !ret) { - fprintf(stderr, "There are no extents for csum range " - "%Lu-%Lu\n", bytenr, bytenr+num_bytes); - ret = 1; + rec = btrfs_new_device_extent_record(eb, key, slot); + ret = insert_device_extent_record(dev_extent_cache, rec); + if (ret) { + fprintf(stderr, + "Device extent[%llu, %llu, %llu] existed.\n", + rec->objectid, rec->offset, rec->length); + free(rec); } - btrfs_free_path(path); return ret; } -static int check_csums(struct btrfs_root *root) +static int process_extent_item(struct btrfs_root *root, + struct cache_tree *extent_cache, + struct extent_buffer *eb, int slot) { - struct btrfs_path *path; - struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + struct btrfs_shared_data_ref *sref; struct btrfs_key key; - u64 offset = 0, num_bytes = 0; - u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); - int errors = 0; + struct extent_record tmpl; + unsigned long end; + unsigned long ptr; int ret; - u64 data_len; - unsigned long leaf_offset; + int type; + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 refs = 0; + u64 offset; + u64 num_bytes; + int metadata = 0; - root = root->fs_info->csum_root; - if (!extent_buffer_uptodate(root->node)) { - fprintf(stderr, "No valid csum tree found\n"); - return -ENOENT; + btrfs_item_key_to_cpu(eb, &key, slot); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + metadata = 1; + num_bytes = root->nodesize; + } else { + num_bytes = key.offset; } - key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; - key.type = BTRFS_EXTENT_CSUM_KEY; - key.offset = 0; + if (!IS_ALIGNED(key.objectid, root->sectorsize)) { + error("ignoring invalid extent, bytenr %llu is not aligned to %u", + key.objectid, root->sectorsize); + return -EIO; + } + if (item_size < sizeof(*ei)) { +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + struct btrfs_extent_item_v0 *ei0; + BUG_ON(item_size != sizeof(*ei0)); + ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0); + refs = btrfs_extent_refs_v0(eb, ei0); +#else + BUG(); +#endif + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = key.objectid; + tmpl.nr = num_bytes; + tmpl.extent_item_refs = refs; + tmpl.metadata = metadata; + tmpl.found_rec = 1; + tmpl.max_size = num_bytes; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + return add_extent_rec(extent_cache, &tmpl); + } - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - fprintf(stderr, "Error searching csum tree %d\n", ret); - btrfs_free_path(path); - return ret; + ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); + refs = btrfs_extent_refs(eb, ei); + if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; + else + metadata = 0; + if (metadata && num_bytes != root->nodesize) { + error("ignore invalid metadata extent, length %llu does not equal to %u", + num_bytes, root->nodesize); + return -EIO; + } + if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) { + error("ignore invalid data extent, length %llu is not aligned to %u", + num_bytes, root->sectorsize); + return -EIO; } - if (ret > 0 && path->slots[0]) - path->slots[0]--; - ret = 0; + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = key.objectid; + tmpl.nr = num_bytes; + tmpl.extent_item_refs = refs; + tmpl.metadata = metadata; + tmpl.found_rec = 1; + tmpl.max_size = num_bytes; + add_extent_rec(extent_cache, &tmpl); - while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) { - fprintf(stderr, "Error going to next leaf " - "%d\n", ret); - break; - } - if (ret) - break; - } - leaf = path->nodes[0]; + ptr = (unsigned long)(ei + 1); + if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK && + key.type == BTRFS_EXTENT_ITEM_KEY) + ptr += sizeof(struct btrfs_tree_block_info); - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.type != BTRFS_EXTENT_CSUM_KEY) { - path->slots[0]++; - continue; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(eb, iref); + offset = btrfs_extent_inline_ref_offset(eb, iref); + switch (type) { + case BTRFS_TREE_BLOCK_REF_KEY: + ret = add_tree_backref(extent_cache, key.objectid, + 0, offset, 0); + if (ret < 0) + error("add_tree_backref failed: %s", + strerror(-ret)); + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = add_tree_backref(extent_cache, key.objectid, + offset, 0, 0); + if (ret < 0) + error("add_tree_backref failed: %s", + strerror(-ret)); + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + add_data_backref(extent_cache, key.objectid, 0, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, + dref), + btrfs_extent_data_ref_offset(eb, dref), + btrfs_extent_data_ref_count(eb, dref), + 0, num_bytes); + break; + case BTRFS_SHARED_DATA_REF_KEY: + sref = (struct btrfs_shared_data_ref *)(iref + 1); + add_data_backref(extent_cache, key.objectid, offset, + 0, 0, 0, + btrfs_shared_data_ref_count(eb, sref), + 0, num_bytes); + break; + default: + fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n", + key.objectid, key.type, num_bytes); + goto out; } + ptr += btrfs_extent_inline_ref_size(type); + } + WARN_ON(ptr > end); +out: + return 0; +} - data_len = (btrfs_item_size_nr(leaf, path->slots[0]) / - csum_size) * root->sectorsize; - if (!check_data_csum) - goto skip_csum_check; - leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]); - ret = check_extent_csums(root, key.offset, data_len, - leaf_offset, leaf); +static int check_cache_range(struct btrfs_root *root, + struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes) +{ + struct btrfs_free_space *entry; + u64 *logical; + u64 bytenr; + int stripe_len; + int i, nr, ret; + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + cache->key.objectid, bytenr, 0, + &logical, &nr, &stripe_len); if (ret) - break; -skip_csum_check: - if (!num_bytes) { - offset = key.offset; - } else if (key.offset != offset + num_bytes) { - ret = check_extent_exists(root, offset, num_bytes); - if (ret) { - fprintf(stderr, "Csum exists for %Lu-%Lu but " - "there is no extent record\n", - offset, offset+num_bytes); - errors++; - } - offset = key.offset; - num_bytes = 0; - } - num_bytes += data_len; - path->slots[0]++; - } + return ret; - btrfs_free_path(path); - return errors; -} + while (nr--) { + if (logical[nr] + stripe_len <= offset) + continue; + if (offset + bytes <= logical[nr]) + continue; + if (logical[nr] == offset) { + if (stripe_len >= bytes) { + free(logical); + return 0; + } + bytes -= stripe_len; + offset += stripe_len; + } else if (logical[nr] < offset) { + if (logical[nr] + stripe_len >= + offset + bytes) { + free(logical); + return 0; + } + bytes = (offset + bytes) - + (logical[nr] + stripe_len); + offset = logical[nr] + stripe_len; + } else { + /* + * Could be tricky, the super may land in the + * middle of the area we're checking. First + * check the easiest case, it's at the end. + */ + if (logical[nr] + stripe_len >= + bytes + offset) { + bytes = logical[nr] - offset; + continue; + } -static int is_dropped_key(struct btrfs_key *key, - struct btrfs_key *drop_key) { - if (key->objectid < drop_key->objectid) - return 1; - else if (key->objectid == drop_key->objectid) { - if (key->type < drop_key->type) - return 1; - else if (key->type == drop_key->type) { - if (key->offset < drop_key->offset) - return 1; - } - } - return 0; -} + /* Check the left side */ + ret = check_cache_range(root, cache, + offset, + logical[nr] - offset); + if (ret) { + free(logical); + return ret; + } -/* - * Here are the rules for FULL_BACKREF. - * - * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set. - * 2) If btrfs_header_owner(buf) no longer points to buf then we have - * FULL_BACKREF set. - * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell - * if it happened after the relocation occurred since we'll have dropped the - * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and - * have no real way to know for sure. - * - * We process the blocks one root at a time, and we start from the lowest root - * objectid and go to the highest. So we can just lookup the owner backref for - * the record and if we don't find it then we know it doesn't exist and we have - * a FULL BACKREF. - * - * FIXME: if we ever start reclaiming root objectid's then we need to fix this - * assumption and simply indicate that we _think_ that the FULL BACKREF needs to - * be set or not and then we can check later once we've gathered all the refs. - */ -static int calc_extent_flag(struct btrfs_root *root, - struct cache_tree *extent_cache, - struct extent_buffer *buf, - struct root_item_record *ri, - u64 *flags) -{ - struct extent_record *rec; - struct cache_extent *cache; - struct tree_backref *tback; - u64 owner = 0; + /* Now we continue with the right side */ + bytes = (offset + bytes) - + (logical[nr] + stripe_len); + offset = logical[nr] + stripe_len; + } + } - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); + free(logical); + } - /* - * Except file/reloc tree, we can not have - * FULL BACKREF MODE - */ - if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID) - goto normal; - /* - * root node - */ - if (buf->start == ri->bytenr) - goto normal; + entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes); + if (!entry) { + fprintf(stderr, "There is no free space entry for %Lu-%Lu\n", + offset, offset+bytes); + return -EINVAL; + } - if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) - goto full_backref; + if (entry->offset != offset) { + fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset, + entry->offset); + return -EINVAL; + } - owner = btrfs_header_owner(buf); - if (owner == ri->objectid) - goto normal; + if (entry->bytes != bytes) { + fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n", + bytes, entry->bytes, offset); + return -EINVAL; + } - tback = find_tree_backref(rec, 0, owner); - if (!tback) - goto full_backref; -normal: - *flags = 0; - if (rec->flag_block_full_backref != -1 && - rec->flag_block_full_backref != 0) - rec->bad_full_backref = 1; - return 0; -full_backref: - *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - if (rec->flag_block_full_backref != -1 && - rec->flag_block_full_backref != 1) - rec->bad_full_backref = 1; + unlink_free_space(cache->free_space_ctl, entry); + free(entry); return 0; } -static int run_next_block(struct btrfs_root *root, - struct block_info *bits, - int bits_nr, - u64 *last, - struct cache_tree *pending, - struct cache_tree *seen, - struct cache_tree *reada, - struct cache_tree *nodes, - struct cache_tree *extent_cache, - struct cache_tree *chunk_cache, - struct rb_root *dev_cache, - struct block_group_tree *block_group_cache, - struct device_extent_tree *dev_extent_cache, - struct root_item_record *ri) +static int verify_space_cache(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) { - struct extent_buffer *buf; - struct extent_record *rec = NULL; - u64 bytenr; - u32 size; - u64 parent; - u64 owner; - u64 flags; - u64 ptr; - u64 gen = 0; - int ret = 0; - int i; - int nritems; + struct btrfs_path path; + struct extent_buffer *leaf; struct btrfs_key key; - struct cache_extent *cache; - int reada_bits; + u64 last; + int ret = 0; - nritems = pick_next_pending(pending, reada, nodes, *last, bits, - bits_nr, &reada_bits); - if (nritems == 0) - return 1; + root = root->fs_info->extent_root; - if (!reada_bits) { - for(i = 0; i < nritems; i++) { - ret = add_cache_extent(reada, bits[i].start, - bits[i].size); - if (ret == -EEXIST) - continue; + last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET); - /* fixme, get the parent transid */ - readahead_tree_block(root, bits[i].start, - bits[i].size, 0); + btrfs_init_path(&path); + key.objectid = last; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + ret = 0; + while (1) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + break; + } + } + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.objectid >= cache->key.offset + cache->key.objectid) + break; + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) { + path.slots[0]++; + continue; } - } - *last = bits[0].start; - bytenr = bits[0].start; - size = bits[0].size; - cache = lookup_cache_extent(pending, bytenr, size); - if (cache) { - remove_cache_extent(pending, cache); - free(cache); - } - cache = lookup_cache_extent(reada, bytenr, size); - if (cache) { - remove_cache_extent(reada, cache); - free(cache); - } - cache = lookup_cache_extent(nodes, bytenr, size); - if (cache) { - remove_cache_extent(nodes, cache); - free(cache); - } - cache = lookup_cache_extent(extent_cache, bytenr, size); - if (cache) { - rec = container_of(cache, struct extent_record, cache); - gen = rec->parent_generation; - } + if (last == key.objectid) { + if (key.type == BTRFS_EXTENT_ITEM_KEY) + last = key.objectid + key.offset; + else + last = key.objectid + root->nodesize; + path.slots[0]++; + continue; + } - /* fixme, get the real parent transid */ - buf = read_tree_block(root, bytenr, size, gen); - if (!extent_buffer_uptodate(buf)) { - record_bad_block_io(root->fs_info, - extent_cache, bytenr, size); - goto out; + ret = check_cache_range(root, cache, last, + key.objectid - last); + if (ret) + break; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + last = key.objectid + key.offset; + else + last = key.objectid + root->nodesize; + path.slots[0]++; } - nritems = btrfs_header_nritems(buf); + if (last < cache->key.objectid + cache->key.offset) + ret = check_cache_range(root, cache, last, + cache->key.objectid + + cache->key.offset - last); - flags = 0; - if (!init_extent_tree) { - ret = btrfs_lookup_extent_info(NULL, root, bytenr, - btrfs_header_level(buf), 1, NULL, - &flags); - if (ret < 0) { - ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); - if (ret < 0) { - fprintf(stderr, "Couldn't calc extent flags\n"); - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } - } - } else { - flags = 0; - ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); - if (ret < 0) { - fprintf(stderr, "Couldn't calc extent flags\n"); - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } +out: + btrfs_release_path(&path); + + if (!ret && + !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) { + fprintf(stderr, "There are still entries left in the space " + "cache\n"); + ret = -EINVAL; } - if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { - if (ri != NULL && - ri->objectid != BTRFS_TREE_RELOC_OBJECTID && - ri->objectid == btrfs_header_owner(buf)) { - /* - * Ok we got to this block from it's original owner and - * we have FULL_BACKREF set. Relocation can leave - * converted blocks over so this is altogether possible, - * however it's not possible if the generation > the - * last snapshot, so check for this case. - */ - if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) && - btrfs_header_generation(buf) > ri->last_snapshot) { - flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; - rec->bad_full_backref = 1; - } - } - } else { - if (ri != NULL && - (ri->objectid == BTRFS_TREE_RELOC_OBJECTID || - btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - rec->bad_full_backref = 1; - } + return ret; +} + +static int check_space_cache(struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache; + u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE; + int ret; + int error = 0; + + if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL && + btrfs_super_generation(root->fs_info->super_copy) != + btrfs_super_cache_generation(root->fs_info->super_copy)) { + printf("cache and super generation don't match, space cache " + "will be invalidated\n"); + return 0; } - if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { - rec->flag_block_full_backref = 1; - parent = bytenr; - owner = 0; - } else { - rec->flag_block_full_backref = 0; - parent = 0; - owner = btrfs_header_owner(buf); + if (ctx.progress_enabled) { + ctx.tp = TASK_FREE_SPACE; + task_start(ctx.info); } - ret = check_block(root, extent_cache, buf, flags); - if (ret) - goto out; + while (1) { + cache = btrfs_lookup_first_block_group(root->fs_info, start); + if (!cache) + break; - if (btrfs_is_leaf(buf)) { - btree_space_waste += btrfs_leaf_free_space(root, buf); - for (i = 0; i < nritems; i++) { - struct btrfs_file_extent_item *fi; - btrfs_item_key_to_cpu(buf, &key, i); - if (key.type == BTRFS_EXTENT_ITEM_KEY) { - process_extent_item(root, extent_cache, buf, - i); - continue; - } - if (key.type == BTRFS_METADATA_ITEM_KEY) { - process_extent_item(root, extent_cache, buf, - i); - continue; - } - if (key.type == BTRFS_EXTENT_CSUM_KEY) { - total_csum_bytes += - btrfs_item_size_nr(buf, i); - continue; - } - if (key.type == BTRFS_CHUNK_ITEM_KEY) { - process_chunk_item(chunk_cache, &key, buf, i); - continue; + start = cache->key.objectid + cache->key.offset; + if (!cache->free_space_ctl) { + if (btrfs_init_free_space_ctl(cache, + root->sectorsize)) { + ret = -ENOMEM; + break; } - if (key.type == BTRFS_DEV_ITEM_KEY) { - process_device_item(dev_cache, &key, buf, i); + } else { + btrfs_remove_free_space_cache(cache); + } + + if (btrfs_fs_compat_ro(root->fs_info, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { + ret = exclude_super_stripes(root, cache); + if (ret) { + fprintf(stderr, "could not exclude super stripes: %s\n", + strerror(-ret)); + error++; continue; } - if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { - process_block_group_item(block_group_cache, - &key, buf, i); + ret = load_free_space_tree(root->fs_info, cache); + free_excluded_extents(root, cache); + if (ret < 0) { + fprintf(stderr, "could not load free space tree: %s\n", + strerror(-ret)); + error++; continue; } - if (key.type == BTRFS_DEV_EXTENT_KEY) { - process_device_extent_item(dev_extent_cache, - &key, buf, i); + error += ret; + } else { + ret = load_free_space_cache(root->fs_info, cache); + if (!ret) continue; + } - } - if (key.type == BTRFS_EXTENT_REF_V0_KEY) { -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - process_extent_ref_v0(extent_cache, buf, i); -#else - BUG(); -#endif - continue; - } + ret = verify_space_cache(root, cache); + if (ret) { + fprintf(stderr, "cache appears valid but isn't %Lu\n", + cache->key.objectid); + error++; + } + } - if (key.type == BTRFS_TREE_BLOCK_REF_KEY) { - add_tree_backref(extent_cache, key.objectid, 0, - key.offset, 0); - continue; - } - if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { - add_tree_backref(extent_cache, key.objectid, - key.offset, 0, 0); - continue; - } - if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { - struct btrfs_extent_data_ref *ref; - ref = btrfs_item_ptr(buf, i, - struct btrfs_extent_data_ref); - add_data_backref(extent_cache, - key.objectid, 0, - btrfs_extent_data_ref_root(buf, ref), - btrfs_extent_data_ref_objectid(buf, - ref), - btrfs_extent_data_ref_offset(buf, ref), - btrfs_extent_data_ref_count(buf, ref), - 0, root->sectorsize); - continue; - } - if (key.type == BTRFS_SHARED_DATA_REF_KEY) { - struct btrfs_shared_data_ref *ref; - ref = btrfs_item_ptr(buf, i, - struct btrfs_shared_data_ref); - add_data_backref(extent_cache, - key.objectid, key.offset, 0, 0, 0, - btrfs_shared_data_ref_count(buf, ref), - 0, root->sectorsize); - continue; - } - if (key.type == BTRFS_ORPHAN_ITEM_KEY) { - struct bad_item *bad; + task_stop(ctx.info); - if (key.objectid == BTRFS_ORPHAN_OBJECTID) - continue; - if (!owner) - continue; - bad = malloc(sizeof(struct bad_item)); - if (!bad) - continue; - INIT_LIST_HEAD(&bad->list); - memcpy(&bad->key, &key, - sizeof(struct btrfs_key)); - bad->root_id = owner; - list_add_tail(&bad->list, &delete_items); - continue; - } - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - if (btrfs_file_extent_disk_bytenr(buf, fi) == 0) - continue; - - data_bytes_allocated += - btrfs_file_extent_disk_num_bytes(buf, fi); - if (data_bytes_allocated < root->sectorsize) { - abort(); - } - data_bytes_referenced += - btrfs_file_extent_num_bytes(buf, fi); - add_data_backref(extent_cache, - btrfs_file_extent_disk_bytenr(buf, fi), - parent, owner, key.objectid, key.offset - - btrfs_file_extent_offset(buf, fi), 1, 1, - btrfs_file_extent_disk_num_bytes(buf, fi)); - } - } else { - int level; - struct btrfs_key first_key; - - first_key.objectid = 0; - - if (nritems > 0) - btrfs_item_key_to_cpu(buf, &first_key, 0); - level = btrfs_header_level(buf); - for (i = 0; i < nritems; i++) { - ptr = btrfs_node_blockptr(buf, i); - size = root->nodesize; - btrfs_node_key_to_cpu(buf, &key, i); - if (ri != NULL) { - if ((level == ri->drop_level) - && is_dropped_key(&key, &ri->drop_key)) { - continue; - } - } - ret = add_extent_rec(extent_cache, &key, - btrfs_node_ptr_generation(buf, i), - ptr, size, 0, 0, 1, 0, 1, 0, - size); - BUG_ON(ret); - - add_tree_backref(extent_cache, ptr, parent, owner, 1); - - if (level > 1) { - add_pending(nodes, seen, ptr, size); - } else { - add_pending(pending, seen, ptr, size); - } - } - btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - - nritems) * sizeof(struct btrfs_key_ptr); - } - total_btree_bytes += buf->len; - if (fs_root_objectid(btrfs_header_owner(buf))) - total_fs_tree_bytes += buf->len; - if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) - total_extent_tree_bytes += buf->len; - if (!found_old_backref && - btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID && - btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV && - !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) - found_old_backref = 1; -out: - free_extent_buffer(buf); - return ret; + return error ? -EINVAL : 0; } -static int add_root_to_pending(struct extent_buffer *buf, - struct cache_tree *extent_cache, - struct cache_tree *pending, - struct cache_tree *seen, - struct cache_tree *nodes, - u64 objectid) -{ - if (btrfs_header_level(buf) > 0) - add_pending(nodes, seen, buf->start, buf->len); - else - add_pending(pending, seen, buf->start, buf->len); - add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len, - 0, 1, 1, 0, 1, 0, buf->len); +static int check_extent_csums(struct btrfs_root *root, u64 bytenr, + u64 num_bytes, unsigned long leaf_offset, + struct extent_buffer *eb) { - if (objectid == BTRFS_TREE_RELOC_OBJECTID || - btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) - add_tree_backref(extent_cache, buf->start, buf->start, - 0, 1); - else - add_tree_backref(extent_cache, buf->start, 0, objectid, 1); - return 0; -} + u64 offset = 0; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + char *data; + unsigned long csum_offset; + u32 csum; + u32 csum_expected; + u64 read_len; + u64 data_checked = 0; + u64 tmp; + int ret = 0; + int mirror; + int num_copies; -/* as we fix the tree, we might be deleting blocks that - * we're tracking for repair. This hook makes sure we - * remove any backrefs for blocks as we are fixing them. - */ -static int free_extent_hook(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset, - int refs_to_drop) -{ - struct extent_record *rec; - struct cache_extent *cache; - int is_data; - struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache; + if (num_bytes % root->sectorsize) + return -EINVAL; - is_data = owner >= BTRFS_FIRST_FREE_OBJECTID; - cache = lookup_cache_extent(extent_cache, bytenr, num_bytes); - if (!cache) - return 0; + data = malloc(num_bytes); + if (!data) + return -ENOMEM; - rec = container_of(cache, struct extent_record, cache); - if (is_data) { - struct data_backref *back; - back = find_data_backref(rec, parent, root_objectid, owner, - offset, 1, bytenr, num_bytes); - if (!back) + while (offset < num_bytes) { + mirror = 0; +again: + read_len = num_bytes - offset; + /* read as much space once a time */ + ret = read_extent_data(root, data + offset, + bytenr + offset, &read_len, mirror); + if (ret) goto out; - if (back->node.found_ref) { - back->found_ref -= refs_to_drop; - if (rec->refs) - rec->refs -= refs_to_drop; - } - if (back->node.found_extent_tree) { - back->num_refs -= refs_to_drop; - if (rec->extent_item_refs) - rec->extent_item_refs -= refs_to_drop; - } - if (back->found_ref == 0) - back->node.found_ref = 0; - if (back->num_refs == 0) - back->node.found_extent_tree = 0; + data_checked = 0; + /* verify every 4k data's checksum */ + while (data_checked < read_len) { + csum = ~(u32)0; + tmp = offset + data_checked; - if (!back->node.found_extent_tree && back->node.found_ref) { - list_del(&back->node.list); - free(back); - } - } else { - struct tree_backref *back; - back = find_tree_backref(rec, parent, root_objectid); - if (!back) - goto out; - if (back->node.found_ref) { - if (rec->refs) - rec->refs--; - back->node.found_ref = 0; - } - if (back->node.found_extent_tree) { - if (rec->extent_item_refs) - rec->extent_item_refs--; - back->node.found_extent_tree = 0; - } - if (!back->node.found_extent_tree && back->node.found_ref) { - list_del(&back->node.list); - free(back); + csum = btrfs_csum_data(NULL, (char *)data + tmp, + csum, root->sectorsize); + btrfs_csum_final(csum, (u8 *)&csum); + + csum_offset = leaf_offset + + tmp / root->sectorsize * csum_size; + read_extent_buffer(eb, (char *)&csum_expected, + csum_offset, csum_size); + /* try another mirror */ + if (csum != csum_expected) { + fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n", + mirror, bytenr + tmp, + csum, csum_expected); + num_copies = btrfs_num_copies( + &root->fs_info->mapping_tree, + bytenr, num_bytes); + if (mirror < num_copies - 1) { + mirror += 1; + goto again; + } + } + data_checked += root->sectorsize; } + offset += read_len; } - maybe_free_extent_rec(extent_cache, rec); out: - return 0; + free(data); + return ret; } -static int delete_extent_records(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u64 bytenr, u64 new_len) +static int check_extent_exists(struct btrfs_root *root, u64 bytenr, + u64 num_bytes) { - struct btrfs_key key; - struct btrfs_key found_key; + struct btrfs_path path; struct extent_buffer *leaf; + struct btrfs_key key; int ret; - int slot; - + btrfs_init_path(&path); key.objectid = bytenr; - key.type = (u8)-1; + key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = (u64)-1; - while(1) { - ret = btrfs_search_slot(trans, root->fs_info->extent_root, - &key, path, 0, 1); - if (ret < 0) - break; - - if (ret > 0) { - ret = 0; - if (path->slots[0] == 0) - break; - path->slots[0]--; +again: + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path, + 0, 0); + if (ret < 0) { + fprintf(stderr, "Error looking up extent record %d\n", ret); + btrfs_release_path(&path); + return ret; + } else if (ret) { + if (path.slots[0] > 0) { + path.slots[0]--; + } else { + ret = btrfs_prev_leaf(root, &path); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + goto out; + } } - ret = 0; - - leaf = path->nodes[0]; - slot = path->slots[0]; + } - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid != bytenr) - break; + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); - if (found_key.type != BTRFS_EXTENT_ITEM_KEY && - found_key.type != BTRFS_METADATA_ITEM_KEY && - found_key.type != BTRFS_TREE_BLOCK_REF_KEY && - found_key.type != BTRFS_EXTENT_DATA_REF_KEY && - found_key.type != BTRFS_EXTENT_REF_V0_KEY && - found_key.type != BTRFS_SHARED_BLOCK_REF_KEY && - found_key.type != BTRFS_SHARED_DATA_REF_KEY) { - btrfs_release_path(path); - if (found_key.type == 0) { - if (found_key.offset == 0) - break; - key.offset = found_key.offset - 1; - key.type = found_key.type; + /* + * Block group items come before extent items if they have the same + * bytenr, so walk back one more just in case. Dear future traveller, + * first congrats on mastering time travel. Now if it's not too much + * trouble could you go back to 2006 and tell Chris to make the + * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the + * EXTENT_ITEM_KEY please? + */ + while (key.type > BTRFS_EXTENT_ITEM_KEY) { + if (path.slots[0] > 0) { + path.slots[0]--; + } else { + ret = btrfs_prev_leaf(root, &path); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + goto out; } - key.type = found_key.type - 1; - key.offset = (u64)-1; - continue; } + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + } - fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n", - found_key.objectid, found_key.type, found_key.offset); - - ret = btrfs_del_item(trans, root->fs_info->extent_root, path); - if (ret) + while (num_bytes) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + btrfs_release_path(&path); + return ret; + } else if (ret) { + break; + } + } + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.type != BTRFS_EXTENT_ITEM_KEY) { + path.slots[0]++; + continue; + } + if (key.objectid + key.offset < bytenr) { + path.slots[0]++; + continue; + } + if (key.objectid > bytenr + num_bytes) break; - btrfs_release_path(path); - if (found_key.type == BTRFS_EXTENT_ITEM_KEY || - found_key.type == BTRFS_METADATA_ITEM_KEY) { - u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ? - found_key.offset : root->nodesize; - - ret = btrfs_update_block_group(trans, root, bytenr, - bytes, 0, 0); - if (ret) + if (key.objectid == bytenr) { + if (key.offset >= num_bytes) { + num_bytes = 0; + break; + } + num_bytes -= key.offset; + bytenr += key.offset; + } else if (key.objectid < bytenr) { + if (key.objectid + key.offset >= bytenr + num_bytes) { + num_bytes = 0; break; + } + num_bytes = (bytenr + num_bytes) - + (key.objectid + key.offset); + bytenr = key.objectid + key.offset; + } else { + if (key.objectid + key.offset < bytenr + num_bytes) { + u64 new_start = key.objectid + key.offset; + u64 new_bytes = bytenr + num_bytes - new_start; + + /* + * Weird case, the extent is in the middle of + * our range, we'll have to search one side + * and then the other. Not sure if this happens + * in real life, but no harm in coding it up + * anyway just in case. + */ + btrfs_release_path(&path); + ret = check_extent_exists(root, new_start, + new_bytes); + if (ret) { + fprintf(stderr, "Right section didn't " + "have a record\n"); + break; + } + num_bytes = key.objectid - bytenr; + goto again; + } + num_bytes = key.objectid - bytenr; } + path.slots[0]++; + } + ret = 0; + +out: + if (num_bytes && !ret) { + fprintf(stderr, "There are no extents for csum range " + "%Lu-%Lu\n", bytenr, bytenr+num_bytes); + ret = 1; } - btrfs_release_path(path); + btrfs_release_path(&path); return ret; } -/* - * for a single backref, this will allocate a new extent - * and add the backref to it. - */ -static int record_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, - struct btrfs_path *path, - struct extent_record *rec, - struct extent_backref *back, - int allocated, u64 flags) +static int check_csums(struct btrfs_root *root) { - int ret; - struct btrfs_root *extent_root = info->extent_root; + struct btrfs_path path; struct extent_buffer *leaf; - struct btrfs_key ins_key; - struct btrfs_extent_item *ei; - struct tree_backref *tback; - struct data_backref *dback; - struct btrfs_tree_block_info *bi; - - if (!back->is_data) - rec->max_size = max_t(u64, rec->max_size, - info->extent_root->nodesize); - - if (!allocated) { - u32 item_size = sizeof(*ei); - - if (!back->is_data) - item_size += sizeof(*bi); - - ins_key.objectid = rec->start; - ins_key.offset = rec->max_size; - ins_key.type = BTRFS_EXTENT_ITEM_KEY; - - ret = btrfs_insert_empty_item(trans, extent_root, path, - &ins_key, item_size); - if (ret) - goto fail; - - leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_item); - - btrfs_set_extent_refs(leaf, ei, 0); - btrfs_set_extent_generation(leaf, ei, rec->generation); + struct btrfs_key key; + u64 offset = 0, num_bytes = 0; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + int errors = 0; + int ret; + u64 data_len; + unsigned long leaf_offset; - if (back->is_data) { - btrfs_set_extent_flags(leaf, ei, - BTRFS_EXTENT_FLAG_DATA); - } else { - struct btrfs_disk_key copy_key;; + root = root->fs_info->csum_root; + if (!extent_buffer_uptodate(root->node)) { + fprintf(stderr, "No valid csum tree found\n"); + return -ENOENT; + } - tback = (struct tree_backref *)back; - bi = (struct btrfs_tree_block_info *)(ei + 1); - memset_extent_buffer(leaf, 0, (unsigned long)bi, - sizeof(*bi)); + btrfs_init_path(&path); + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.type = BTRFS_EXTENT_CSUM_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + fprintf(stderr, "Error searching csum tree %d\n", ret); + btrfs_release_path(&path); + return ret; + } - btrfs_set_disk_key_objectid(©_key, - rec->info_objectid); - btrfs_set_disk_key_type(©_key, 0); - btrfs_set_disk_key_offset(©_key, 0); + if (ret > 0 && path.slots[0]) + path.slots[0]--; + ret = 0; - btrfs_set_tree_block_level(leaf, bi, rec->info_level); - btrfs_set_tree_block_key(leaf, bi, ©_key); + while (1) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + break; + } + if (ret) + break; + } + leaf = path.nodes[0]; - btrfs_set_extent_flags(leaf, ei, - BTRFS_EXTENT_FLAG_TREE_BLOCK | flags); + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.type != BTRFS_EXTENT_CSUM_KEY) { + path.slots[0]++; + continue; } - btrfs_mark_buffer_dirty(leaf); - ret = btrfs_update_block_group(trans, extent_root, rec->start, - rec->max_size, 1, 0); + data_len = (btrfs_item_size_nr(leaf, path.slots[0]) / + csum_size) * root->sectorsize; + if (!check_data_csum) + goto skip_csum_check; + leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]); + ret = check_extent_csums(root, key.offset, data_len, + leaf_offset, leaf); if (ret) - goto fail; - btrfs_release_path(path); + break; +skip_csum_check: + if (!num_bytes) { + offset = key.offset; + } else if (key.offset != offset + num_bytes) { + ret = check_extent_exists(root, offset, num_bytes); + if (ret) { + fprintf(stderr, "Csum exists for %Lu-%Lu but " + "there is no extent record\n", + offset, offset+num_bytes); + errors++; + } + offset = key.offset; + num_bytes = 0; + } + num_bytes += data_len; + path.slots[0]++; } - if (back->is_data) { - u64 parent; - int i; + btrfs_release_path(&path); + return errors; +} - dback = (struct data_backref *)back; - if (back->full_backref) - parent = dback->parent; - else - parent = 0; +static int is_dropped_key(struct btrfs_key *key, + struct btrfs_key *drop_key) { + if (key->objectid < drop_key->objectid) + return 1; + else if (key->objectid == drop_key->objectid) { + if (key->type < drop_key->type) + return 1; + else if (key->type == drop_key->type) { + if (key->offset < drop_key->offset) + return 1; + } + } + return 0; +} - for (i = 0; i < dback->found_ref; i++) { - /* if parent != 0, we're doing a full backref - * passing BTRFS_FIRST_FREE_OBJECTID as the owner - * just makes the backref allocator create a data - * backref - */ - ret = btrfs_inc_extent_ref(trans, info->extent_root, - rec->start, rec->max_size, - parent, - dback->root, - parent ? - BTRFS_FIRST_FREE_OBJECTID : - dback->owner, - dback->offset); - if (ret) - break; - } - fprintf(stderr, "adding new data backref" - " on %llu %s %llu owner %llu" - " offset %llu found %d\n", - (unsigned long long)rec->start, - back->full_backref ? - "parent" : "root", - back->full_backref ? - (unsigned long long)parent : - (unsigned long long)dback->root, - (unsigned long long)dback->owner, - (unsigned long long)dback->offset, - dback->found_ref); - } else { - u64 parent; - - tback = (struct tree_backref *)back; - if (back->full_backref) - parent = tback->parent; - else - parent = 0; - - ret = btrfs_inc_extent_ref(trans, info->extent_root, - rec->start, rec->max_size, - parent, tback->root, 0, 0); - fprintf(stderr, "adding new tree backref on " - "start %llu len %llu parent %llu root %llu\n", - rec->start, rec->max_size, parent, tback->root); - } -fail: - btrfs_release_path(path); - return ret; -} - -static struct extent_entry *find_entry(struct list_head *entries, - u64 bytenr, u64 bytes) +/* + * Here are the rules for FULL_BACKREF. + * + * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set. + * 2) If btrfs_header_owner(buf) no longer points to buf then we have + * FULL_BACKREF set. + * 3) We cowed the block walking down a reloc tree. This is impossible to tell + * if it happened after the relocation occurred since we'll have dropped the + * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and + * have no real way to know for sure. + * + * We process the blocks one root at a time, and we start from the lowest root + * objectid and go to the highest. So we can just lookup the owner backref for + * the record and if we don't find it then we know it doesn't exist and we have + * a FULL BACKREF. + * + * FIXME: if we ever start reclaiming root objectid's then we need to fix this + * assumption and simply indicate that we _think_ that the FULL BACKREF needs to + * be set or not and then we can check later once we've gathered all the refs. + */ +static int calc_extent_flag(struct btrfs_root *root, + struct cache_tree *extent_cache, + struct extent_buffer *buf, + struct root_item_record *ri, + u64 *flags) { - struct extent_entry *entry = NULL; + struct extent_record *rec; + struct cache_extent *cache; + struct tree_backref *tback; + u64 owner = 0; - list_for_each_entry(entry, entries, list) { - if (entry->bytenr == bytenr && entry->bytes == bytes) - return entry; - } + cache = lookup_cache_extent(extent_cache, buf->start, 1); + /* we have added this extent before */ + if (!cache) + return -ENOENT; - return NULL; -} + rec = container_of(cache, struct extent_record, cache); -static struct extent_entry *find_most_right_entry(struct list_head *entries) -{ - struct extent_entry *entry, *best = NULL, *prev = NULL; + /* + * Except file/reloc tree, we can not have + * FULL BACKREF MODE + */ + if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID) + goto normal; + /* + * root node + */ + if (buf->start == ri->bytenr) + goto normal; - list_for_each_entry(entry, entries, list) { - if (!prev) { - prev = entry; - continue; - } + if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) + goto full_backref; - /* - * If there are as many broken entries as entries then we know - * not to trust this particular entry. - */ - if (entry->broken == entry->count) - continue; + owner = btrfs_header_owner(buf); + if (owner == ri->objectid) + goto normal; - /* - * If our current entry == best then we can't be sure our best - * is really the best, so we need to keep searching. - */ - if (best && best->count == entry->count) { - prev = entry; - best = NULL; - continue; - } + tback = find_tree_backref(rec, 0, owner); + if (!tback) + goto full_backref; +normal: + *flags = 0; + if (rec->flag_block_full_backref != FLAG_UNSET && + rec->flag_block_full_backref != 0) + rec->bad_full_backref = 1; + return 0; +full_backref: + *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + if (rec->flag_block_full_backref != FLAG_UNSET && + rec->flag_block_full_backref != 1) + rec->bad_full_backref = 1; + return 0; +} - /* Prev == entry, not good enough, have to keep searching */ - if (!prev->broken && prev->count == entry->count) - continue; +static void report_mismatch_key_root(u8 key_type, u64 rootid) +{ + fprintf(stderr, "Invalid key type("); + print_key_type(stderr, 0, key_type); + fprintf(stderr, ") found in root("); + print_objectid(stderr, rootid, 0); + fprintf(stderr, ")\n"); +} - if (!best) - best = (prev->count > entry->count) ? prev : entry; - else if (best->count < entry->count) - best = entry; - prev = entry; +/* + * Check if the key is valid with its extent buffer. + * + * This is a early check in case invalid key exists in a extent buffer + * This is not comprehensive yet, but should prevent wrong key/item passed + * further + */ +static int check_type_with_root(u64 rootid, u8 key_type) +{ + switch (key_type) { + /* Only valid in chunk tree */ + case BTRFS_DEV_ITEM_KEY: + case BTRFS_CHUNK_ITEM_KEY: + if (rootid != BTRFS_CHUNK_TREE_OBJECTID) + goto err; + break; + /* valid in csum and log tree */ + case BTRFS_CSUM_TREE_OBJECTID: + if (!(rootid == BTRFS_TREE_LOG_OBJECTID || + is_fstree(rootid))) + goto err; + break; + case BTRFS_EXTENT_ITEM_KEY: + case BTRFS_METADATA_ITEM_KEY: + case BTRFS_BLOCK_GROUP_ITEM_KEY: + if (rootid != BTRFS_EXTENT_TREE_OBJECTID) + goto err; + break; + case BTRFS_ROOT_ITEM_KEY: + if (rootid != BTRFS_ROOT_TREE_OBJECTID) + goto err; + break; + case BTRFS_DEV_EXTENT_KEY: + if (rootid != BTRFS_DEV_TREE_OBJECTID) + goto err; + break; } - - return best; + return 0; +err: + report_mismatch_key_root(key_type, rootid); + return -EINVAL; } -static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path, - struct data_backref *dback, struct extent_entry *entry) +static int run_next_block(struct btrfs_root *root, + struct block_info *bits, + int bits_nr, + u64 *last, + struct cache_tree *pending, + struct cache_tree *seen, + struct cache_tree *reada, + struct cache_tree *nodes, + struct cache_tree *extent_cache, + struct cache_tree *chunk_cache, + struct rb_root *dev_cache, + struct block_group_tree *block_group_cache, + struct device_extent_tree *dev_extent_cache, + struct root_item_record *ri) { - struct btrfs_trans_handle *trans; - struct btrfs_root *root; - struct btrfs_file_extent_item *fi; - struct extent_buffer *leaf; + struct extent_buffer *buf; + struct extent_record *rec = NULL; + u64 bytenr; + u32 size; + u64 parent; + u64 owner; + u64 flags; + u64 ptr; + u64 gen = 0; + int ret = 0; + int i; + int nritems; struct btrfs_key key; - u64 bytenr, bytes; - int ret, err; + struct cache_extent *cache; + int reada_bits; - key.objectid = dback->root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - root = btrfs_read_fs_root(info, &key); - if (IS_ERR(root)) { - fprintf(stderr, "Couldn't find root for our ref\n"); - return -EINVAL; - } + nritems = pick_next_pending(pending, reada, nodes, *last, bits, + bits_nr, &reada_bits); + if (nritems == 0) + return 1; - /* - * The backref points to the original offset of the extent if it was - * split, so we need to search down to the offset we have and then walk - * forward until we find the backref we're looking for. - */ - key.objectid = dback->owner; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = dback->offset; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - fprintf(stderr, "Error looking up ref %d\n", ret); - return ret; - } + if (!reada_bits) { + for(i = 0; i < nritems; i++) { + ret = add_cache_extent(reada, bits[i].start, + bits[i].size); + if (ret == -EEXIST) + continue; - while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); - if (ret) { - fprintf(stderr, "Couldn't find our ref, next\n"); - return -EINVAL; - } - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid != dback->owner || - key.type != BTRFS_EXTENT_DATA_KEY) { - fprintf(stderr, "Couldn't find our ref, search\n"); - return -EINVAL; + /* fixme, get the parent transid */ + readahead_tree_block(root, bits[i].start, + bits[i].size, 0); } - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); - bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); - - if (bytenr == dback->disk_bytenr && bytes == dback->bytes) - break; - path->slots[0]++; } + *last = bits[0].start; + bytenr = bits[0].start; + size = bits[0].size; - btrfs_release_path(path); - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - /* - * Ok we have the key of the file extent we want to fix, now we can cow - * down to the thing and fix it. - */ - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) { - fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n", - key.objectid, key.type, key.offset, ret); - goto out; + cache = lookup_cache_extent(pending, bytenr, size); + if (cache) { + remove_cache_extent(pending, cache); + free(cache); } - if (ret > 0) { - fprintf(stderr, "Well that's odd, we just found this key " - "[%Lu, %u, %Lu]\n", key.objectid, key.type, - key.offset); - ret = -EINVAL; - goto out; + cache = lookup_cache_extent(reada, bytenr, size); + if (cache) { + remove_cache_extent(reada, cache); + free(cache); + } + cache = lookup_cache_extent(nodes, bytenr, size); + if (cache) { + remove_cache_extent(nodes, cache); + free(cache); + } + cache = lookup_cache_extent(extent_cache, bytenr, size); + if (cache) { + rec = container_of(cache, struct extent_record, cache); + gen = rec->parent_generation; } - leaf = path->nodes[0]; - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_compression(leaf, fi) && - dback->disk_bytenr != entry->bytenr) { - fprintf(stderr, "Ref doesn't match the record start and is " - "compressed, please take a btrfs-image of this file " - "system and send it to a btrfs developer so they can " - "complete this functionality for bytenr %Lu\n", - dback->disk_bytenr); - ret = -EINVAL; + /* fixme, get the real parent transid */ + buf = read_tree_block(root, bytenr, size, gen); + if (!extent_buffer_uptodate(buf)) { + record_bad_block_io(root->fs_info, + extent_cache, bytenr, size); goto out; } - if (dback->node.broken && dback->disk_bytenr != entry->bytenr) { - btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); - } else if (dback->disk_bytenr > entry->bytenr) { - u64 off_diff, offset; + nritems = btrfs_header_nritems(buf); - off_diff = dback->disk_bytenr - entry->bytenr; - offset = btrfs_file_extent_offset(leaf, fi); - if (dback->disk_bytenr + offset + - btrfs_file_extent_num_bytes(leaf, fi) > - entry->bytenr + entry->bytes) { - fprintf(stderr, "Ref is past the entry end, please " - "take a btrfs-image of this file system and " - "send it to a btrfs developer, ref %Lu\n", - dback->disk_bytenr); - ret = -EINVAL; - goto out; + flags = 0; + if (!init_extent_tree) { + ret = btrfs_lookup_extent_info(NULL, root, bytenr, + btrfs_header_level(buf), 1, NULL, + &flags); + if (ret < 0) { + ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } } - offset += off_diff; - btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); - btrfs_set_file_extent_offset(leaf, fi, offset); - } else if (dback->disk_bytenr < entry->bytenr) { - u64 offset; - - offset = btrfs_file_extent_offset(leaf, fi); - if (dback->disk_bytenr + offset < entry->bytenr) { - fprintf(stderr, "Ref is before the entry start, please" - " take a btrfs-image of this file system and " - "send it to a btrfs developer, ref %Lu\n", - dback->disk_bytenr); - ret = -EINVAL; - goto out; + } else { + flags = 0; + ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; } - - offset += dback->disk_bytenr; - offset -= entry->bytenr; - btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); - btrfs_set_file_extent_offset(leaf, fi, offset); } - btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes); - - /* - * Chances are if disk_num_bytes were wrong then so is ram_bytes, but - * only do this if we aren't using compression, otherwise it's a - * trickier case. - */ - if (!btrfs_file_extent_compression(leaf, fi)) - btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes); - else - printf("ram bytes may be wrong?\n"); - btrfs_mark_buffer_dirty(leaf); -out: - err = btrfs_commit_transaction(trans, root); - btrfs_release_path(path); - return ret ? ret : err; -} - -static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, - struct extent_record *rec) -{ - struct extent_backref *back; - struct data_backref *dback; - struct extent_entry *entry, *best = NULL; - LIST_HEAD(entries); - int nr_entries = 0; - int broken_entries = 0; - int ret = 0; - short mismatch = 0; - - /* - * Metadata is easy and the backrefs should always agree on bytenr and - * size, if not we've got bigger issues. - */ - if (rec->metadata) - return 0; - - list_for_each_entry(back, &rec->backrefs, list) { - if (back->full_backref || !back->is_data) - continue; - - dback = (struct data_backref *)back; - - /* - * We only pay attention to backrefs that we found a real - * backref for. - */ - if (dback->found_ref == 0) - continue; - - /* - * For now we only catch when the bytes don't match, not the - * bytenr. We can easily do this at the same time, but I want - * to have a fs image to test on before we just add repair - * functionality willy-nilly so we know we won't screw up the - * repair. - */ - - entry = find_entry(&entries, dback->disk_bytenr, - dback->bytes); - if (!entry) { - entry = malloc(sizeof(struct extent_entry)); - if (!entry) { - ret = -ENOMEM; - goto out; + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (ri != NULL && + ri->objectid != BTRFS_TREE_RELOC_OBJECTID && + ri->objectid == btrfs_header_owner(buf)) { + /* + * Ok we got to this block from it's original owner and + * we have FULL_BACKREF set. Relocation can leave + * converted blocks over so this is altogether possible, + * however it's not possible if the generation > the + * last snapshot, so check for this case. + */ + if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) && + btrfs_header_generation(buf) > ri->last_snapshot) { + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; } - memset(entry, 0, sizeof(*entry)); - entry->bytenr = dback->disk_bytenr; - entry->bytes = dback->bytes; - list_add_tail(&entry->list, &entries); - nr_entries++; } - - /* - * If we only have on entry we may think the entries agree when - * in reality they don't so we have to do some extra checking. - */ - if (dback->disk_bytenr != rec->start || - dback->bytes != rec->nr || back->broken) - mismatch = 1; - - if (back->broken) { - entry->broken++; - broken_entries++; + } else { + if (ri != NULL && + (ri->objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; } + } - entry->count++; + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + rec->flag_block_full_backref = 1; + parent = bytenr; + owner = 0; + } else { + rec->flag_block_full_backref = 0; + parent = 0; + owner = btrfs_header_owner(buf); } - /* Yay all the backrefs agree, carry on good sir */ - if (nr_entries <= 1 && !mismatch) + ret = check_block(root, extent_cache, buf, flags); + if (ret) goto out; - fprintf(stderr, "attempting to repair backref discrepency for bytenr " - "%Lu\n", rec->start); - - /* - * First we want to see if the backrefs can agree amongst themselves who - * is right, so figure out which one of the entries has the highest - * count. - */ - best = find_most_right_entry(&entries); - - /* - * Ok so we may have an even split between what the backrefs think, so - * this is where we use the extent ref to see what it thinks. - */ - if (!best) { - entry = find_entry(&entries, rec->start, rec->nr); - if (!entry && (!broken_entries || !rec->found_rec)) { - fprintf(stderr, "Backrefs don't agree with each other " - "and extent record doesn't agree with anybody," - " so we can't fix bytenr %Lu bytes %Lu\n", - rec->start, rec->nr); - ret = -EINVAL; - goto out; - } else if (!entry) { + if (btrfs_is_leaf(buf)) { + btree_space_waste += btrfs_leaf_free_space(root, buf); + for (i = 0; i < nritems; i++) { + struct btrfs_file_extent_item *fi; + btrfs_item_key_to_cpu(buf, &key, i); /* - * Ok our backrefs were broken, we'll assume this is the - * correct value and add an entry for this range. + * Check key type against the leaf owner. + * Could filter quite a lot of early error if + * owner is correct */ - entry = malloc(sizeof(struct extent_entry)); - if (!entry) { - ret = -ENOMEM; - goto out; + if (check_type_with_root(btrfs_header_owner(buf), + key.type)) { + fprintf(stderr, "ignoring invalid key\n"); + continue; } - memset(entry, 0, sizeof(*entry)); - entry->bytenr = rec->start; - entry->bytes = rec->nr; - list_add_tail(&entry->list, &entries); - nr_entries++; - } - entry->count++; - best = find_most_right_entry(&entries); - if (!best) { - fprintf(stderr, "Backrefs and extent record evenly " - "split on who is right, this is going to " - "require user input to fix bytenr %Lu bytes " - "%Lu\n", rec->start, rec->nr); - ret = -EINVAL; - goto out; + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + process_extent_item(root, extent_cache, buf, + i); + continue; + } + if (key.type == BTRFS_METADATA_ITEM_KEY) { + process_extent_item(root, extent_cache, buf, + i); + continue; + } + if (key.type == BTRFS_EXTENT_CSUM_KEY) { + total_csum_bytes += + btrfs_item_size_nr(buf, i); + continue; + } + if (key.type == BTRFS_CHUNK_ITEM_KEY) { + process_chunk_item(chunk_cache, &key, buf, i); + continue; + } + if (key.type == BTRFS_DEV_ITEM_KEY) { + process_device_item(dev_cache, &key, buf, i); + continue; + } + if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { + process_block_group_item(block_group_cache, + &key, buf, i); + continue; + } + if (key.type == BTRFS_DEV_EXTENT_KEY) { + process_device_extent_item(dev_extent_cache, + &key, buf, i); + continue; + + } + if (key.type == BTRFS_EXTENT_REF_V0_KEY) { +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + process_extent_ref_v0(extent_cache, buf, i); +#else + BUG(); +#endif + continue; + } + + if (key.type == BTRFS_TREE_BLOCK_REF_KEY) { + ret = add_tree_backref(extent_cache, + key.objectid, 0, key.offset, 0); + if (ret < 0) + error("add_tree_backref failed: %s", + strerror(-ret)); + continue; + } + if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { + ret = add_tree_backref(extent_cache, + key.objectid, key.offset, 0, 0); + if (ret < 0) + error("add_tree_backref failed: %s", + strerror(-ret)); + continue; + } + if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { + struct btrfs_extent_data_ref *ref; + ref = btrfs_item_ptr(buf, i, + struct btrfs_extent_data_ref); + add_data_backref(extent_cache, + key.objectid, 0, + btrfs_extent_data_ref_root(buf, ref), + btrfs_extent_data_ref_objectid(buf, + ref), + btrfs_extent_data_ref_offset(buf, ref), + btrfs_extent_data_ref_count(buf, ref), + 0, root->sectorsize); + continue; + } + if (key.type == BTRFS_SHARED_DATA_REF_KEY) { + struct btrfs_shared_data_ref *ref; + ref = btrfs_item_ptr(buf, i, + struct btrfs_shared_data_ref); + add_data_backref(extent_cache, + key.objectid, key.offset, 0, 0, 0, + btrfs_shared_data_ref_count(buf, ref), + 0, root->sectorsize); + continue; + } + if (key.type == BTRFS_ORPHAN_ITEM_KEY) { + struct bad_item *bad; + + if (key.objectid == BTRFS_ORPHAN_OBJECTID) + continue; + if (!owner) + continue; + bad = malloc(sizeof(struct bad_item)); + if (!bad) + continue; + INIT_LIST_HEAD(&bad->list); + memcpy(&bad->key, &key, + sizeof(struct btrfs_key)); + bad->root_id = owner; + list_add_tail(&bad->list, &delete_items); + continue; + } + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(buf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + if (btrfs_file_extent_disk_bytenr(buf, fi) == 0) + continue; + + data_bytes_allocated += + btrfs_file_extent_disk_num_bytes(buf, fi); + if (data_bytes_allocated < root->sectorsize) { + abort(); + } + data_bytes_referenced += + btrfs_file_extent_num_bytes(buf, fi); + add_data_backref(extent_cache, + btrfs_file_extent_disk_bytenr(buf, fi), + parent, owner, key.objectid, key.offset - + btrfs_file_extent_offset(buf, fi), 1, 1, + btrfs_file_extent_disk_num_bytes(buf, fi)); } - } + } else { + int level; + struct btrfs_key first_key; - /* - * I don't think this can happen currently as we'll abort() if we catch - * this case higher up, but in case somebody removes that we still can't - * deal with it properly here yet, so just bail out of that's the case. - */ - if (best->bytenr != rec->start) { - fprintf(stderr, "Extent start and backref starts don't match, " - "please use btrfs-image on this file system and send " - "it to a btrfs developer so they can make fsck fix " - "this particular case. bytenr is %Lu, bytes is %Lu\n", - rec->start, rec->nr); - ret = -EINVAL; - goto out; - } + first_key.objectid = 0; - /* - * Ok great we all agreed on an extent record, let's go find the real - * references and fix up the ones that don't match. - */ - list_for_each_entry(back, &rec->backrefs, list) { - if (back->full_backref || !back->is_data) - continue; + if (nritems > 0) + btrfs_item_key_to_cpu(buf, &first_key, 0); + level = btrfs_header_level(buf); + for (i = 0; i < nritems; i++) { + struct extent_record tmpl; - dback = (struct data_backref *)back; + ptr = btrfs_node_blockptr(buf, i); + size = root->nodesize; + btrfs_node_key_to_cpu(buf, &key, i); + if (ri != NULL) { + if ((level == ri->drop_level) + && is_dropped_key(&key, &ri->drop_key)) { + continue; + } + } - /* - * Still ignoring backrefs that don't have a real ref attached - * to them. - */ - if (dback->found_ref == 0) - continue; + memset(&tmpl, 0, sizeof(tmpl)); + btrfs_cpu_key_to_disk(&tmpl.parent_key, &key); + tmpl.parent_generation = btrfs_node_ptr_generation(buf, i); + tmpl.start = ptr; + tmpl.nr = size; + tmpl.refs = 1; + tmpl.metadata = 1; + tmpl.max_size = size; + ret = add_extent_rec(extent_cache, &tmpl); + if (ret < 0) + goto out; - if (dback->bytes == best->bytes && - dback->disk_bytenr == best->bytenr) - continue; + ret = add_tree_backref(extent_cache, ptr, parent, + owner, 1); + if (ret < 0) { + error("add_tree_backref failed: %s", + strerror(-ret)); + continue; + } - ret = repair_ref(info, path, dback, best); - if (ret) - goto out; + if (level > 1) { + add_pending(nodes, seen, ptr, size); + } else { + add_pending(pending, seen, ptr, size); + } + } + btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - + nritems) * sizeof(struct btrfs_key_ptr); } - - /* - * Ok we messed with the actual refs, which means we need to drop our - * entire cache and go back and rescan. I know this is a huge pain and - * adds a lot of extra work, but it's the only way to be safe. Once all - * the backrefs agree we may not need to do anything to the extent - * record itself. - */ - ret = -EAGAIN; + total_btree_bytes += buf->len; + if (fs_root_objectid(btrfs_header_owner(buf))) + total_fs_tree_bytes += buf->len; + if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) + total_extent_tree_bytes += buf->len; + if (!found_old_backref && + btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID && + btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV && + !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) + found_old_backref = 1; out: - while (!list_empty(&entries)) { - entry = list_entry(entries.next, struct extent_entry, list); - list_del_init(&entry->list); - free(entry); - } + free_extent_buffer(buf); return ret; } -static int process_duplicates(struct btrfs_root *root, - struct cache_tree *extent_cache, - struct extent_record *rec) +static int add_root_to_pending(struct extent_buffer *buf, + struct cache_tree *extent_cache, + struct cache_tree *pending, + struct cache_tree *seen, + struct cache_tree *nodes, + u64 objectid) { - struct extent_record *good, *tmp; - struct cache_extent *cache; + struct extent_record tmpl; int ret; - /* - * If we found a extent record for this extent then return, or if we - * have more than one duplicate we are likely going to need to delete - * something. - */ - if (rec->found_rec || rec->num_duplicates > 1) - return 0; + if (btrfs_header_level(buf) > 0) + add_pending(nodes, seen, buf->start, buf->len); + else + add_pending(pending, seen, buf->start, buf->len); - /* Shouldn't happen but just in case */ - BUG_ON(!rec->num_duplicates); + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = buf->start; + tmpl.nr = buf->len; + tmpl.is_root = 1; + tmpl.refs = 1; + tmpl.metadata = 1; + tmpl.max_size = buf->len; + add_extent_rec(extent_cache, &tmpl); - /* - * So this happens if we end up with a backref that doesn't match the - * actual extent entry. So either the backref is bad or the extent - * entry is bad. Either way we want to have the extent_record actually - * reflect what we found in the extent_tree, so we need to take the - * duplicate out and use that as the extent_record since the only way we - * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY. - */ - remove_cache_extent(extent_cache, &rec->cache); + if (objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) + ret = add_tree_backref(extent_cache, buf->start, buf->start, + 0, 1); + else + ret = add_tree_backref(extent_cache, buf->start, 0, objectid, + 1); + return ret; +} - good = list_entry(rec->dups.next, struct extent_record, list); - list_del_init(&good->list); - INIT_LIST_HEAD(&good->backrefs); - INIT_LIST_HEAD(&good->dups); - good->cache.start = good->start; - good->cache.size = good->nr; - good->content_checked = 0; - good->owner_ref_checked = 0; - good->num_duplicates = 0; - good->refs = rec->refs; - list_splice_init(&rec->backrefs, &good->backrefs); - while (1) { - cache = lookup_cache_extent(extent_cache, good->start, - good->nr); - if (!cache) - break; - tmp = container_of(cache, struct extent_record, cache); +/* as we fix the tree, we might be deleting blocks that + * we're tracking for repair. This hook makes sure we + * remove any backrefs for blocks as we are fixing them. + */ +static int free_extent_hook(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset, + int refs_to_drop) +{ + struct extent_record *rec; + struct cache_extent *cache; + int is_data; + struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache; - /* - * If we find another overlapping extent and it's found_rec is - * set then it's a duplicate and we need to try and delete - * something. - */ - if (tmp->found_rec || tmp->num_duplicates > 0) { - if (list_empty(&good->list)) - list_add_tail(&good->list, - &duplicate_extents); - good->num_duplicates += tmp->num_duplicates + 1; - list_splice_init(&tmp->dups, &good->dups); - list_del_init(&tmp->list); - list_add_tail(&tmp->list, &good->dups); - remove_cache_extent(extent_cache, &tmp->cache); - continue; + is_data = owner >= BTRFS_FIRST_FREE_OBJECTID; + cache = lookup_cache_extent(extent_cache, bytenr, num_bytes); + if (!cache) + return 0; + + rec = container_of(cache, struct extent_record, cache); + if (is_data) { + struct data_backref *back; + back = find_data_backref(rec, parent, root_objectid, owner, + offset, 1, bytenr, num_bytes); + if (!back) + goto out; + if (back->node.found_ref) { + back->found_ref -= refs_to_drop; + if (rec->refs) + rec->refs -= refs_to_drop; + } + if (back->node.found_extent_tree) { + back->num_refs -= refs_to_drop; + if (rec->extent_item_refs) + rec->extent_item_refs -= refs_to_drop; } + if (back->found_ref == 0) + back->node.found_ref = 0; + if (back->num_refs == 0) + back->node.found_extent_tree = 0; - /* - * Ok we have another non extent item backed extent rec, so lets - * just add it to this extent and carry on like we did above. - */ - good->refs += tmp->refs; - list_splice_init(&tmp->backrefs, &good->backrefs); - remove_cache_extent(extent_cache, &tmp->cache); - free(tmp); + if (!back->node.found_extent_tree && back->node.found_ref) { + list_del(&back->node.list); + free(back); + } + } else { + struct tree_backref *back; + back = find_tree_backref(rec, parent, root_objectid); + if (!back) + goto out; + if (back->node.found_ref) { + if (rec->refs) + rec->refs--; + back->node.found_ref = 0; + } + if (back->node.found_extent_tree) { + if (rec->extent_item_refs) + rec->extent_item_refs--; + back->node.found_extent_tree = 0; + } + if (!back->node.found_extent_tree && back->node.found_ref) { + list_del(&back->node.list); + free(back); + } } - ret = insert_cache_extent(extent_cache, &good->cache); - BUG_ON(ret); - free(rec); - return good->num_duplicates ? 0 : 1; + maybe_free_extent_rec(extent_cache, rec); +out: + return 0; } -static int delete_duplicate_records(struct btrfs_root *root, - struct extent_record *rec) +static int delete_extent_records(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 new_len) { - struct btrfs_trans_handle *trans; - LIST_HEAD(delete_list); - struct btrfs_path *path; - struct extent_record *tmp, *good, *n; - int nr_del = 0; - int ret = 0, err; struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + int slot; - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - good = rec; - /* Find the record that covers all of the duplicates. */ - list_for_each_entry(tmp, &rec->dups, list) { - if (good->start < tmp->start) - continue; - if (good->nr > tmp->nr) - continue; + key.objectid = bytenr; + key.type = (u8)-1; + key.offset = (u64)-1; - if (tmp->start + tmp->nr < good->start + good->nr) { - fprintf(stderr, "Ok we have overlapping extents that " - "aren't completely covered by eachother, this " - "is going to require more careful thought. " - "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n", - tmp->start, tmp->nr, good->start, good->nr); - abort(); - } - good = tmp; - } + while(1) { + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, path, 0, 1); + if (ret < 0) + break; - if (good != rec) - list_add_tail(&rec->list, &delete_list); + if (ret > 0) { + ret = 0; + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + ret = 0; - list_for_each_entry_safe(tmp, n, &rec->dups, list) { - if (tmp == good) - continue; - list_move_tail(&tmp->list, &delete_list); - } + leaf = path->nodes[0]; + slot = path->slots[0]; - root = root->fs_info->extent_root; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid != bytenr) + break; - list_for_each_entry(tmp, &delete_list, list) { - if (tmp->found_rec == 0) + if (found_key.type != BTRFS_EXTENT_ITEM_KEY && + found_key.type != BTRFS_METADATA_ITEM_KEY && + found_key.type != BTRFS_TREE_BLOCK_REF_KEY && + found_key.type != BTRFS_EXTENT_DATA_REF_KEY && + found_key.type != BTRFS_EXTENT_REF_V0_KEY && + found_key.type != BTRFS_SHARED_BLOCK_REF_KEY && + found_key.type != BTRFS_SHARED_DATA_REF_KEY) { + btrfs_release_path(path); + if (found_key.type == 0) { + if (found_key.offset == 0) + break; + key.offset = found_key.offset - 1; + key.type = found_key.type; + } + key.type = found_key.type - 1; + key.offset = (u64)-1; continue; - key.objectid = tmp->start; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = tmp->nr; - - /* Shouldn't happen but just in case */ - if (tmp->metadata) { - fprintf(stderr, "Well this shouldn't happen, extent " - "record overlaps but is metadata? " - "[%Lu, %Lu]\n", tmp->start, tmp->nr); - abort(); } - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret) { - if (ret > 0) - ret = -EINVAL; - break; - } - ret = btrfs_del_item(trans, root, path); + fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n", + found_key.objectid, found_key.type, found_key.offset); + + ret = btrfs_del_item(trans, root->fs_info->extent_root, path); if (ret) break; btrfs_release_path(path); - nr_del++; + + if (found_key.type == BTRFS_EXTENT_ITEM_KEY || + found_key.type == BTRFS_METADATA_ITEM_KEY) { + u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ? + found_key.offset : root->nodesize; + + ret = btrfs_update_block_group(trans, root, bytenr, + bytes, 0, 0); + if (ret) + break; + } } - err = btrfs_commit_transaction(trans, root); - if (err && !ret) - ret = err; -out: - while (!list_empty(&delete_list)) { - tmp = list_entry(delete_list.next, struct extent_record, list); - list_del_init(&tmp->list); - if (tmp == rec) - continue; - free(tmp); + + btrfs_release_path(path); + return ret; +} + +/* + * for a single backref, this will allocate a new extent + * and add the backref to it. + */ +static int record_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info, + struct btrfs_path *path, + struct extent_record *rec, + struct extent_backref *back, + int allocated, u64 flags) +{ + int ret; + struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; + struct btrfs_key ins_key; + struct btrfs_extent_item *ei; + struct data_backref *dback; + struct btrfs_tree_block_info *bi; + + if (!back->is_data) + rec->max_size = max_t(u64, rec->max_size, + info->extent_root->nodesize); + + if (!allocated) { + u32 item_size = sizeof(*ei); + + if (!back->is_data) + item_size += sizeof(*bi); + + ins_key.objectid = rec->start; + ins_key.offset = rec->max_size; + ins_key.type = BTRFS_EXTENT_ITEM_KEY; + + ret = btrfs_insert_empty_item(trans, extent_root, path, + &ins_key, item_size); + if (ret) + goto fail; + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + + btrfs_set_extent_refs(leaf, ei, 0); + btrfs_set_extent_generation(leaf, ei, rec->generation); + + if (back->is_data) { + btrfs_set_extent_flags(leaf, ei, + BTRFS_EXTENT_FLAG_DATA); + } else { + struct btrfs_disk_key copy_key;; + + bi = (struct btrfs_tree_block_info *)(ei + 1); + memset_extent_buffer(leaf, 0, (unsigned long)bi, + sizeof(*bi)); + + btrfs_set_disk_key_objectid(©_key, + rec->info_objectid); + btrfs_set_disk_key_type(©_key, 0); + btrfs_set_disk_key_offset(©_key, 0); + + btrfs_set_tree_block_level(leaf, bi, rec->info_level); + btrfs_set_tree_block_key(leaf, bi, ©_key); + + btrfs_set_extent_flags(leaf, ei, + BTRFS_EXTENT_FLAG_TREE_BLOCK | flags); + } + + btrfs_mark_buffer_dirty(leaf); + ret = btrfs_update_block_group(trans, extent_root, rec->start, + rec->max_size, 1, 0); + if (ret) + goto fail; + btrfs_release_path(path); + } + + if (back->is_data) { + u64 parent; + int i; + + dback = to_data_backref(back); + if (back->full_backref) + parent = dback->parent; + else + parent = 0; + + for (i = 0; i < dback->found_ref; i++) { + /* if parent != 0, we're doing a full backref + * passing BTRFS_FIRST_FREE_OBJECTID as the owner + * just makes the backref allocator create a data + * backref + */ + ret = btrfs_inc_extent_ref(trans, info->extent_root, + rec->start, rec->max_size, + parent, + dback->root, + parent ? + BTRFS_FIRST_FREE_OBJECTID : + dback->owner, + dback->offset); + if (ret) + break; + } + fprintf(stderr, "adding new data backref" + " on %llu %s %llu owner %llu" + " offset %llu found %d\n", + (unsigned long long)rec->start, + back->full_backref ? + "parent" : "root", + back->full_backref ? + (unsigned long long)parent : + (unsigned long long)dback->root, + (unsigned long long)dback->owner, + (unsigned long long)dback->offset, + dback->found_ref); + } else { + u64 parent; + struct tree_backref *tback; + + tback = to_tree_backref(back); + if (back->full_backref) + parent = tback->parent; + else + parent = 0; + + ret = btrfs_inc_extent_ref(trans, info->extent_root, + rec->start, rec->max_size, + parent, tback->root, 0, 0); + fprintf(stderr, "adding new tree backref on " + "start %llu len %llu parent %llu root %llu\n", + rec->start, rec->max_size, parent, tback->root); + } +fail: + btrfs_release_path(path); + return ret; +} + +static struct extent_entry *find_entry(struct list_head *entries, + u64 bytenr, u64 bytes) +{ + struct extent_entry *entry = NULL; + + list_for_each_entry(entry, entries, list) { + if (entry->bytenr == bytenr && entry->bytes == bytes) + return entry; + } + + return NULL; +} + +static struct extent_entry *find_most_right_entry(struct list_head *entries) +{ + struct extent_entry *entry, *best = NULL, *prev = NULL; + + list_for_each_entry(entry, entries, list) { + /* + * If there are as many broken entries as entries then we know + * not to trust this particular entry. + */ + if (entry->broken == entry->count) + continue; + + /* + * Special case, when there are only two entries and 'best' is + * the first one + */ + if (!prev) { + best = entry; + prev = entry; + continue; + } + + /* + * If our current entry == best then we can't be sure our best + * is really the best, so we need to keep searching. + */ + if (best && best->count == entry->count) { + prev = entry; + best = NULL; + continue; + } + + /* Prev == entry, not good enough, have to keep searching */ + if (!prev->broken && prev->count == entry->count) + continue; + + if (!best) + best = (prev->count > entry->count) ? prev : entry; + else if (best->count < entry->count) + best = entry; + prev = entry; + } + + return best; +} + +static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path, + struct data_backref *dback, struct extent_entry *entry) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + struct btrfs_file_extent_item *fi; + struct extent_buffer *leaf; + struct btrfs_key key; + u64 bytenr, bytes; + int ret, err; + + key.objectid = dback->root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = btrfs_read_fs_root(info, &key); + if (IS_ERR(root)) { + fprintf(stderr, "Couldn't find root for our ref\n"); + return -EINVAL; + } + + /* + * The backref points to the original offset of the extent if it was + * split, so we need to search down to the offset we have and then walk + * forward until we find the backref we're looking for. + */ + key.objectid = dback->owner; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = dback->offset; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + fprintf(stderr, "Error looking up ref %d\n", ret); + return ret; + } + + while (1) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret) { + fprintf(stderr, "Couldn't find our ref, next\n"); + return -EINVAL; + } + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != dback->owner || + key.type != BTRFS_EXTENT_DATA_KEY) { + fprintf(stderr, "Couldn't find our ref, search\n"); + return -EINVAL; + } + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + + if (bytenr == dback->disk_bytenr && bytes == dback->bytes) + break; + path->slots[0]++; + } + + btrfs_release_path(path); + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + /* + * Ok we have the key of the file extent we want to fix, now we can cow + * down to the thing and fix it. + */ + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { + fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n", + key.objectid, key.type, key.offset, ret); + goto out; + } + if (ret > 0) { + fprintf(stderr, "Well that's odd, we just found this key " + "[%Lu, %u, %Lu]\n", key.objectid, key.type, + key.offset); + ret = -EINVAL; + goto out; + } + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + if (btrfs_file_extent_compression(leaf, fi) && + dback->disk_bytenr != entry->bytenr) { + fprintf(stderr, "Ref doesn't match the record start and is " + "compressed, please take a btrfs-image of this file " + "system and send it to a btrfs developer so they can " + "complete this functionality for bytenr %Lu\n", + dback->disk_bytenr); + ret = -EINVAL; + goto out; + } + + if (dback->node.broken && dback->disk_bytenr != entry->bytenr) { + btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); + } else if (dback->disk_bytenr > entry->bytenr) { + u64 off_diff, offset; + + off_diff = dback->disk_bytenr - entry->bytenr; + offset = btrfs_file_extent_offset(leaf, fi); + if (dback->disk_bytenr + offset + + btrfs_file_extent_num_bytes(leaf, fi) > + entry->bytenr + entry->bytes) { + fprintf(stderr, "Ref is past the entry end, please " + "take a btrfs-image of this file system and " + "send it to a btrfs developer, ref %Lu\n", + dback->disk_bytenr); + ret = -EINVAL; + goto out; + } + offset += off_diff; + btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); + btrfs_set_file_extent_offset(leaf, fi, offset); + } else if (dback->disk_bytenr < entry->bytenr) { + u64 offset; + + offset = btrfs_file_extent_offset(leaf, fi); + if (dback->disk_bytenr + offset < entry->bytenr) { + fprintf(stderr, "Ref is before the entry start, please" + " take a btrfs-image of this file system and " + "send it to a btrfs developer, ref %Lu\n", + dback->disk_bytenr); + ret = -EINVAL; + goto out; + } + + offset += dback->disk_bytenr; + offset -= entry->bytenr; + btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); + btrfs_set_file_extent_offset(leaf, fi, offset); + } + + btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes); + + /* + * Chances are if disk_num_bytes were wrong then so is ram_bytes, but + * only do this if we aren't using compression, otherwise it's a + * trickier case. + */ + if (!btrfs_file_extent_compression(leaf, fi)) + btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes); + else + printf("ram bytes may be wrong?\n"); + btrfs_mark_buffer_dirty(leaf); +out: + err = btrfs_commit_transaction(trans, root); + btrfs_release_path(path); + return ret ? ret : err; +} + +static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, + struct extent_record *rec) +{ + struct extent_backref *back; + struct data_backref *dback; + struct extent_entry *entry, *best = NULL; + LIST_HEAD(entries); + int nr_entries = 0; + int broken_entries = 0; + int ret = 0; + short mismatch = 0; + + /* + * Metadata is easy and the backrefs should always agree on bytenr and + * size, if not we've got bigger issues. + */ + if (rec->metadata) + return 0; + + list_for_each_entry(back, &rec->backrefs, list) { + if (back->full_backref || !back->is_data) + continue; + + dback = to_data_backref(back); + + /* + * We only pay attention to backrefs that we found a real + * backref for. + */ + if (dback->found_ref == 0) + continue; + + /* + * For now we only catch when the bytes don't match, not the + * bytenr. We can easily do this at the same time, but I want + * to have a fs image to test on before we just add repair + * functionality willy-nilly so we know we won't screw up the + * repair. + */ + + entry = find_entry(&entries, dback->disk_bytenr, + dback->bytes); + if (!entry) { + entry = malloc(sizeof(struct extent_entry)); + if (!entry) { + ret = -ENOMEM; + goto out; + } + memset(entry, 0, sizeof(*entry)); + entry->bytenr = dback->disk_bytenr; + entry->bytes = dback->bytes; + list_add_tail(&entry->list, &entries); + nr_entries++; + } + + /* + * If we only have on entry we may think the entries agree when + * in reality they don't so we have to do some extra checking. + */ + if (dback->disk_bytenr != rec->start || + dback->bytes != rec->nr || back->broken) + mismatch = 1; + + if (back->broken) { + entry->broken++; + broken_entries++; + } + + entry->count++; + } + + /* Yay all the backrefs agree, carry on good sir */ + if (nr_entries <= 1 && !mismatch) + goto out; + + fprintf(stderr, "attempting to repair backref discrepency for bytenr " + "%Lu\n", rec->start); + + /* + * First we want to see if the backrefs can agree amongst themselves who + * is right, so figure out which one of the entries has the highest + * count. + */ + best = find_most_right_entry(&entries); + + /* + * Ok so we may have an even split between what the backrefs think, so + * this is where we use the extent ref to see what it thinks. + */ + if (!best) { + entry = find_entry(&entries, rec->start, rec->nr); + if (!entry && (!broken_entries || !rec->found_rec)) { + fprintf(stderr, "Backrefs don't agree with each other " + "and extent record doesn't agree with anybody," + " so we can't fix bytenr %Lu bytes %Lu\n", + rec->start, rec->nr); + ret = -EINVAL; + goto out; + } else if (!entry) { + /* + * Ok our backrefs were broken, we'll assume this is the + * correct value and add an entry for this range. + */ + entry = malloc(sizeof(struct extent_entry)); + if (!entry) { + ret = -ENOMEM; + goto out; + } + memset(entry, 0, sizeof(*entry)); + entry->bytenr = rec->start; + entry->bytes = rec->nr; + list_add_tail(&entry->list, &entries); + nr_entries++; + } + entry->count++; + best = find_most_right_entry(&entries); + if (!best) { + fprintf(stderr, "Backrefs and extent record evenly " + "split on who is right, this is going to " + "require user input to fix bytenr %Lu bytes " + "%Lu\n", rec->start, rec->nr); + ret = -EINVAL; + goto out; + } + } + + /* + * I don't think this can happen currently as we'll abort() if we catch + * this case higher up, but in case somebody removes that we still can't + * deal with it properly here yet, so just bail out of that's the case. + */ + if (best->bytenr != rec->start) { + fprintf(stderr, "Extent start and backref starts don't match, " + "please use btrfs-image on this file system and send " + "it to a btrfs developer so they can make fsck fix " + "this particular case. bytenr is %Lu, bytes is %Lu\n", + rec->start, rec->nr); + ret = -EINVAL; + goto out; + } + + /* + * Ok great we all agreed on an extent record, let's go find the real + * references and fix up the ones that don't match. + */ + list_for_each_entry(back, &rec->backrefs, list) { + if (back->full_backref || !back->is_data) + continue; + + dback = to_data_backref(back); + + /* + * Still ignoring backrefs that don't have a real ref attached + * to them. + */ + if (dback->found_ref == 0) + continue; + + if (dback->bytes == best->bytes && + dback->disk_bytenr == best->bytenr) + continue; + + ret = repair_ref(info, path, dback, best); + if (ret) + goto out; + } + + /* + * Ok we messed with the actual refs, which means we need to drop our + * entire cache and go back and rescan. I know this is a huge pain and + * adds a lot of extra work, but it's the only way to be safe. Once all + * the backrefs agree we may not need to do anything to the extent + * record itself. + */ + ret = -EAGAIN; +out: + while (!list_empty(&entries)) { + entry = list_entry(entries.next, struct extent_entry, list); + list_del_init(&entry->list); + free(entry); + } + return ret; +} + +static int process_duplicates(struct btrfs_root *root, + struct cache_tree *extent_cache, + struct extent_record *rec) +{ + struct extent_record *good, *tmp; + struct cache_extent *cache; + int ret; + + /* + * If we found a extent record for this extent then return, or if we + * have more than one duplicate we are likely going to need to delete + * something. + */ + if (rec->found_rec || rec->num_duplicates > 1) + return 0; + + /* Shouldn't happen but just in case */ + BUG_ON(!rec->num_duplicates); + + /* + * So this happens if we end up with a backref that doesn't match the + * actual extent entry. So either the backref is bad or the extent + * entry is bad. Either way we want to have the extent_record actually + * reflect what we found in the extent_tree, so we need to take the + * duplicate out and use that as the extent_record since the only way we + * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY. + */ + remove_cache_extent(extent_cache, &rec->cache); + + good = to_extent_record(rec->dups.next); + list_del_init(&good->list); + INIT_LIST_HEAD(&good->backrefs); + INIT_LIST_HEAD(&good->dups); + good->cache.start = good->start; + good->cache.size = good->nr; + good->content_checked = 0; + good->owner_ref_checked = 0; + good->num_duplicates = 0; + good->refs = rec->refs; + list_splice_init(&rec->backrefs, &good->backrefs); + while (1) { + cache = lookup_cache_extent(extent_cache, good->start, + good->nr); + if (!cache) + break; + tmp = container_of(cache, struct extent_record, cache); + + /* + * If we find another overlapping extent and it's found_rec is + * set then it's a duplicate and we need to try and delete + * something. + */ + if (tmp->found_rec || tmp->num_duplicates > 0) { + if (list_empty(&good->list)) + list_add_tail(&good->list, + &duplicate_extents); + good->num_duplicates += tmp->num_duplicates + 1; + list_splice_init(&tmp->dups, &good->dups); + list_del_init(&tmp->list); + list_add_tail(&tmp->list, &good->dups); + remove_cache_extent(extent_cache, &tmp->cache); + continue; + } + + /* + * Ok we have another non extent item backed extent rec, so lets + * just add it to this extent and carry on like we did above. + */ + good->refs += tmp->refs; + list_splice_init(&tmp->backrefs, &good->backrefs); + remove_cache_extent(extent_cache, &tmp->cache); + free(tmp); + } + ret = insert_cache_extent(extent_cache, &good->cache); + BUG_ON(ret); + free(rec); + return good->num_duplicates ? 0 : 1; +} + +static int delete_duplicate_records(struct btrfs_root *root, + struct extent_record *rec) +{ + struct btrfs_trans_handle *trans; + LIST_HEAD(delete_list); + struct btrfs_path path; + struct extent_record *tmp, *good, *n; + int nr_del = 0; + int ret = 0, err; + struct btrfs_key key; + + btrfs_init_path(&path); + + good = rec; + /* Find the record that covers all of the duplicates. */ + list_for_each_entry(tmp, &rec->dups, list) { + if (good->start < tmp->start) + continue; + if (good->nr > tmp->nr) + continue; + + if (tmp->start + tmp->nr < good->start + good->nr) { + fprintf(stderr, "Ok we have overlapping extents that " + "aren't completely covered by each other, this " + "is going to require more careful thought. " + "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n", + tmp->start, tmp->nr, good->start, good->nr); + abort(); + } + good = tmp; + } + + if (good != rec) + list_add_tail(&rec->list, &delete_list); + + list_for_each_entry_safe(tmp, n, &rec->dups, list) { + if (tmp == good) + continue; + list_move_tail(&tmp->list, &delete_list); + } + + root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + list_for_each_entry(tmp, &delete_list, list) { + if (tmp->found_rec == 0) + continue; + key.objectid = tmp->start; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = tmp->nr; + + /* Shouldn't happen but just in case */ + if (tmp->metadata) { + fprintf(stderr, "Well this shouldn't happen, extent " + "record overlaps but is metadata? " + "[%Lu, %Lu]\n", tmp->start, tmp->nr); + abort(); + } + + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret) { + if (ret > 0) + ret = -EINVAL; + break; + } + ret = btrfs_del_item(trans, root, &path); + if (ret) + break; + btrfs_release_path(&path); + nr_del++; + } + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; +out: + while (!list_empty(&delete_list)) { + tmp = to_extent_record(delete_list.next); + list_del_init(&tmp->list); + if (tmp == rec) + continue; + free(tmp); + } + + while (!list_empty(&rec->dups)) { + tmp = to_extent_record(rec->dups.next); + list_del_init(&tmp->list); + free(tmp); + } + + btrfs_release_path(&path); + + if (!ret && !nr_del) + rec->num_duplicates = 0; + + return ret ? ret : nr_del; +} + +static int find_possible_backrefs(struct btrfs_fs_info *info, + struct btrfs_path *path, + struct cache_tree *extent_cache, + struct extent_record *rec) +{ + struct btrfs_root *root; + struct extent_backref *back; + struct data_backref *dback; + struct cache_extent *cache; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 bytenr, bytes; + int ret; + + list_for_each_entry(back, &rec->backrefs, list) { + /* Don't care about full backrefs (poor unloved backrefs) */ + if (back->full_backref || !back->is_data) + continue; + + dback = to_data_backref(back); + + /* We found this one, we don't need to do a lookup */ + if (dback->found_ref) + continue; + + key.objectid = dback->root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + root = btrfs_read_fs_root(info, &key); + + /* No root, definitely a bad ref, skip */ + if (IS_ERR(root) && PTR_ERR(root) == -ENOENT) + continue; + /* Other err, exit */ + if (IS_ERR(root)) + return PTR_ERR(root); + + key.objectid = dback->owner; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = dback->offset; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret) { + btrfs_release_path(path); + if (ret < 0) + return ret; + /* Didn't find it, we can carry on */ + ret = 0; + continue; + } + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi); + bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi); + btrfs_release_path(path); + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (cache) { + struct extent_record *tmp; + tmp = container_of(cache, struct extent_record, cache); + + /* + * If we found an extent record for the bytenr for this + * particular backref then we can't add it to our + * current extent record. We only want to add backrefs + * that don't have a corresponding extent item in the + * extent tree since they likely belong to this record + * and we need to fix it if it doesn't match bytenrs. + */ + if (tmp->found_rec) + continue; + } + + dback->found_ref += 1; + dback->disk_bytenr = bytenr; + dback->bytes = bytes; + + /* + * Set this so the verify backref code knows not to trust the + * values in this backref. + */ + back->broken = 1; + } + + return 0; +} + +/* + * Record orphan data ref into corresponding root. + * + * Return 0 if the extent item contains data ref and recorded. + * Return 1 if the extent item contains no useful data ref + * On that case, it may contains only shared_dataref or metadata backref + * or the file extent exists(this should be handled by the extent bytenr + * recovery routine) + * Return <0 if something goes wrong. + */ +static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, + struct extent_record *rec) +{ + struct btrfs_key key; + struct btrfs_root *dest_root; + struct extent_backref *back; + struct data_backref *dback; + struct orphan_data_extent *orphan; + struct btrfs_path path; + int recorded_data_ref = 0; + int ret = 0; + + if (rec->metadata) + return 1; + btrfs_init_path(&path); + list_for_each_entry(back, &rec->backrefs, list) { + if (back->full_backref || !back->is_data || + !back->found_extent_tree) + continue; + dback = to_data_backref(back); + if (dback->found_ref) + continue; + key.objectid = dback->root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + dest_root = btrfs_read_fs_root(fs_info, &key); + + /* For non-exist root we just skip it */ + if (IS_ERR(dest_root) || !dest_root) + continue; + + key.objectid = dback->owner; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = dback->offset; + + ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0); + btrfs_release_path(&path); + /* + * For ret < 0, it's OK since the fs-tree may be corrupted, + * we need to record it for inode/file extent rebuild. + * For ret > 0, we record it only for file extent rebuild. + * For ret == 0, the file extent exists but only bytenr + * mismatch, let the original bytenr fix routine to handle, + * don't record it. + */ + if (ret == 0) + continue; + ret = 0; + orphan = malloc(sizeof(*orphan)); + if (!orphan) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&orphan->list); + orphan->root = dback->root; + orphan->objectid = dback->owner; + orphan->offset = dback->offset; + orphan->disk_bytenr = rec->cache.start; + orphan->disk_len = rec->cache.size; + list_add(&dest_root->orphan_data_extents, &orphan->list); + recorded_data_ref = 1; + } +out: + btrfs_release_path(&path); + if (!ret) + return !recorded_data_ref; + else + return ret; +} + +/* + * when an incorrect extent item is found, this will delete + * all of the existing entries for it and recreate them + * based on what the tree scan found. + */ +static int fixup_extent_refs(struct btrfs_fs_info *info, + struct cache_tree *extent_cache, + struct extent_record *rec) +{ + struct btrfs_trans_handle *trans = NULL; + int ret; + struct btrfs_path path; + struct list_head *cur = rec->backrefs.next; + struct cache_extent *cache; + struct extent_backref *back; + int allocated = 0; + u64 flags = 0; + + if (rec->flag_block_full_backref) + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + + btrfs_init_path(&path); + if (rec->refs != rec->extent_item_refs && !rec->metadata) { + /* + * Sometimes the backrefs themselves are so broken they don't + * get attached to any meaningful rec, so first go back and + * check any of our backrefs that we couldn't find and throw + * them into the list if we find the backref so that + * verify_backrefs can figure out what to do. + */ + ret = find_possible_backrefs(info, &path, extent_cache, rec); + if (ret < 0) + goto out; + } + + /* step one, make sure all of the backrefs agree */ + ret = verify_backrefs(info, &path, rec); + if (ret < 0) + goto out; + + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + /* step two, delete all the existing records */ + ret = delete_extent_records(trans, info->extent_root, &path, + rec->start, rec->max_size); + + if (ret < 0) + goto out; + + /* was this block corrupt? If so, don't add references to it */ + cache = lookup_cache_extent(info->corrupt_blocks, + rec->start, rec->max_size); + if (cache) { + ret = 0; + goto out; + } + + /* step three, recreate all the refs we did find */ + while(cur != &rec->backrefs) { + back = to_extent_backref(cur); + cur = cur->next; + + /* + * if we didn't find any references, don't create a + * new extent record + */ + if (!back->found_ref) + continue; + + rec->bad_full_backref = 0; + ret = record_extent(trans, info, &path, rec, back, allocated, flags); + allocated = 1; + + if (ret) + goto out; + } +out: + if (trans) { + int err = btrfs_commit_transaction(trans, info->extent_root); + if (!ret) + ret = err; + } + + btrfs_release_path(&path); + return ret; +} + +static int fixup_extent_flags(struct btrfs_fs_info *fs_info, + struct extent_record *rec) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_path path; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u64 flags; + int ret = 0; + + key.objectid = rec->start; + if (rec->metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = rec->info_level; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = rec->max_size; + } + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + btrfs_init_path(&path); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); + if (ret < 0) { + btrfs_release_path(&path); + btrfs_commit_transaction(trans, root); + return ret; + } else if (ret) { + fprintf(stderr, "Didn't find extent for %llu\n", + (unsigned long long)rec->start); + btrfs_release_path(&path); + btrfs_commit_transaction(trans, root); + return -ENOENT; + } + + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path.nodes[0], ei); + if (rec->flag_block_full_backref) { + fprintf(stderr, "setting full backref on %llu\n", + (unsigned long long)key.objectid); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + fprintf(stderr, "clearing full backref on %llu\n", + (unsigned long long)key.objectid); + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + btrfs_set_extent_flags(path.nodes[0], ei, flags); + btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_release_path(&path); + return btrfs_commit_transaction(trans, root); +} + +/* right now we only prune from the extent allocation tree */ +static int prune_one_block(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info, + struct btrfs_corrupt_block *corrupt) +{ + int ret; + struct btrfs_path path; + struct extent_buffer *eb; + u64 found; + int slot; + int nritems; + int level = corrupt->level + 1; + + btrfs_init_path(&path); +again: + /* we want to stop at the parent to our busted block */ + path.lowest_level = level; + + ret = btrfs_search_slot(trans, info->extent_root, + &corrupt->key, &path, -1, 1); + + if (ret < 0) + goto out; + + eb = path.nodes[level]; + if (!eb) { + ret = -ENOENT; + goto out; + } + + /* + * hopefully the search gave us the block we want to prune, + * lets try that first + */ + slot = path.slots[level]; + found = btrfs_node_blockptr(eb, slot); + if (found == corrupt->cache.start) + goto del_ptr; + + nritems = btrfs_header_nritems(eb); + + /* the search failed, lets scan this node and hope we find it */ + for (slot = 0; slot < nritems; slot++) { + found = btrfs_node_blockptr(eb, slot); + if (found == corrupt->cache.start) + goto del_ptr; + } + /* + * we couldn't find the bad block. TODO, search all the nodes for pointers + * to this block + */ + if (eb == info->extent_root->node) { + ret = -ENOENT; + goto out; + } else { + level++; + btrfs_release_path(&path); + goto again; + } + +del_ptr: + printk("deleting pointer to block %Lu\n", corrupt->cache.start); + ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot); + +out: + btrfs_release_path(&path); + return ret; +} + +static int prune_corrupt_blocks(struct btrfs_fs_info *info) +{ + struct btrfs_trans_handle *trans = NULL; + struct cache_extent *cache; + struct btrfs_corrupt_block *corrupt; + + while (1) { + cache = search_cache_extent(info->corrupt_blocks, 0); + if (!cache) + break; + if (!trans) { + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } + corrupt = container_of(cache, struct btrfs_corrupt_block, cache); + prune_one_block(trans, info, corrupt); + remove_cache_extent(info->corrupt_blocks, cache); + } + if (trans) + return btrfs_commit_transaction(trans, info->extent_root); + return 0; +} + +static void reset_cached_block_groups(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_group_cache *cache; + u64 start, end; + int ret; + + while (1) { + ret = find_first_extent_bit(&fs_info->free_space_cache, 0, + &start, &end, EXTENT_DIRTY); + if (ret) + break; + clear_extent_dirty(&fs_info->free_space_cache, start, end, + GFP_NOFS); + } + + start = 0; + while (1) { + cache = btrfs_lookup_first_block_group(fs_info, start); + if (!cache) + break; + if (cache->cached) + cache->cached = 0; + start = cache->key.objectid + cache->key.offset; + } +} + +static int check_extent_refs(struct btrfs_root *root, + struct cache_tree *extent_cache) +{ + struct extent_record *rec; + struct cache_extent *cache; + int err = 0; + int ret = 0; + int fixed = 0; + int had_dups = 0; + int recorded = 0; + + if (repair) { + /* + * if we're doing a repair, we have to make sure + * we don't allocate from the problem extents. + * In the worst case, this will be all the + * extents in the FS + */ + cache = search_cache_extent(extent_cache, 0); + while(cache) { + rec = container_of(cache, struct extent_record, cache); + set_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); + cache = next_cache_extent(cache); + } + + /* pin down all the corrupted blocks too */ + cache = search_cache_extent(root->fs_info->corrupt_blocks, 0); + while(cache) { + set_extent_dirty(root->fs_info->excluded_extents, + cache->start, + cache->start + cache->size - 1, + GFP_NOFS); + cache = next_cache_extent(cache); + } + prune_corrupt_blocks(root->fs_info); + reset_cached_block_groups(root->fs_info); + } + + reset_cached_block_groups(root->fs_info); + + /* + * We need to delete any duplicate entries we find first otherwise we + * could mess up the extent tree when we have backrefs that actually + * belong to a different extent item and not the weird duplicate one. + */ + while (repair && !list_empty(&duplicate_extents)) { + rec = to_extent_record(duplicate_extents.next); + list_del_init(&rec->list); + + /* Sometimes we can find a backref before we find an actual + * extent, so we need to process it a little bit to see if there + * truly are multiple EXTENT_ITEM_KEY's for the same range, or + * if this is a backref screwup. If we need to delete stuff + * process_duplicates() will return 0, otherwise it will return + * 1 and we + */ + if (process_duplicates(root, extent_cache, rec)) + continue; + ret = delete_duplicate_records(root, rec); + if (ret < 0) + return ret; + /* + * delete_duplicate_records will return the number of entries + * deleted, so if it's greater than 0 then we know we actually + * did something and we need to remove. + */ + if (ret) + had_dups = 1; + } + + if (had_dups) + return -EAGAIN; + + while(1) { + int cur_err = 0; + + fixed = 0; + recorded = 0; + cache = search_cache_extent(extent_cache, 0); + if (!cache) + break; + rec = container_of(cache, struct extent_record, cache); + if (rec->num_duplicates) { + fprintf(stderr, "extent item %llu has multiple extent " + "items\n", (unsigned long long)rec->start); + err = 1; + cur_err = 1; + } + + if (rec->refs != rec->extent_item_refs) { + fprintf(stderr, "ref mismatch on [%llu %llu] ", + (unsigned long long)rec->start, + (unsigned long long)rec->nr); + fprintf(stderr, "extent item %llu, found %llu\n", + (unsigned long long)rec->extent_item_refs, + (unsigned long long)rec->refs); + ret = record_orphan_data_extents(root->fs_info, rec); + if (ret < 0) + goto repair_abort; + if (ret == 0) { + recorded = 1; + } else { + /* + * we can't use the extent to repair file + * extent, let the fallback method handle it. + */ + if (!fixed && repair) { + ret = fixup_extent_refs( + root->fs_info, + extent_cache, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + } + err = 1; + cur_err = 1; + } + if (all_backpointers_checked(rec, 1)) { + fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", + (unsigned long long)rec->start, + (unsigned long long)rec->nr); + + if (!fixed && !recorded && repair) { + ret = fixup_extent_refs(root->fs_info, + extent_cache, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + cur_err = 1; + err = 1; + } + if (!rec->owner_ref_checked) { + fprintf(stderr, "owner ref check failed [%llu %llu]\n", + (unsigned long long)rec->start, + (unsigned long long)rec->nr); + if (!fixed && !recorded && repair) { + ret = fixup_extent_refs(root->fs_info, + extent_cache, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + err = 1; + cur_err = 1; + } + if (rec->bad_full_backref) { + fprintf(stderr, "bad full backref, on [%llu]\n", + (unsigned long long)rec->start); + if (repair) { + ret = fixup_extent_flags(root->fs_info, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + err = 1; + cur_err = 1; + } + /* + * Although it's not a extent ref's problem, we reuse this + * routine for error reporting. + * No repair function yet. + */ + if (rec->crossing_stripes) { + fprintf(stderr, + "bad metadata [%llu, %llu) crossing stripe boundary\n", + rec->start, rec->start + rec->max_size); + err = 1; + cur_err = 1; + } + + if (rec->wrong_chunk_type) { + fprintf(stderr, + "bad extent [%llu, %llu), type mismatch with chunk\n", + rec->start, rec->start + rec->max_size); + err = 1; + cur_err = 1; + } + + remove_cache_extent(extent_cache, cache); + free_all_extent_backrefs(rec); + if (!init_extent_tree && repair && (!cur_err || fixed)) + clear_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); + free(rec); + } +repair_abort: + if (repair) { + if (ret && ret != -EAGAIN) { + fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); + exit(1); + } else if (!ret) { + struct btrfs_trans_handle *trans; + + root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto repair_abort; + } + + btrfs_fix_block_accounting(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto repair_abort; + } + if (err) + fprintf(stderr, "repaired damaged extent references\n"); + return ret; + } + return err; +} + +u64 calc_stripe_length(u64 type, u64 length, int num_stripes) +{ + u64 stripe_size; + + if (type & BTRFS_BLOCK_GROUP_RAID0) { + stripe_size = length; + stripe_size /= num_stripes; + } else if (type & BTRFS_BLOCK_GROUP_RAID10) { + stripe_size = length * 2; + stripe_size /= num_stripes; + } else if (type & BTRFS_BLOCK_GROUP_RAID5) { + stripe_size = length; + stripe_size /= (num_stripes - 1); + } else if (type & BTRFS_BLOCK_GROUP_RAID6) { + stripe_size = length; + stripe_size /= (num_stripes - 2); + } else { + stripe_size = length; + } + return stripe_size; +} + +/* + * Check the chunk with its block group/dev list ref: + * Return 0 if all refs seems valid. + * Return 1 if part of refs seems valid, need later check for rebuild ref + * like missing block group and needs to search extent tree to rebuild them. + * Return -1 if essential refs are missing and unable to rebuild. + */ +static int check_chunk_refs(struct chunk_record *chunk_rec, + struct block_group_tree *block_group_cache, + struct device_extent_tree *dev_extent_cache, + int silent) +{ + struct cache_extent *block_group_item; + struct block_group_record *block_group_rec; + struct cache_extent *dev_extent_item; + struct device_extent_record *dev_extent_rec; + u64 devid; + u64 offset; + u64 length; + int metadump_v2 = 0; + int i; + int ret = 0; + + block_group_item = lookup_cache_extent(&block_group_cache->tree, + chunk_rec->offset, + chunk_rec->length); + if (block_group_item) { + block_group_rec = container_of(block_group_item, + struct block_group_record, + cache); + if (chunk_rec->length != block_group_rec->offset || + chunk_rec->offset != block_group_rec->objectid || + (!metadump_v2 && + chunk_rec->type_flags != block_group_rec->flags)) { + if (!silent) + fprintf(stderr, + "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n", + chunk_rec->objectid, + chunk_rec->type, + chunk_rec->offset, + chunk_rec->length, + chunk_rec->offset, + chunk_rec->type_flags, + block_group_rec->objectid, + block_group_rec->type, + block_group_rec->offset, + block_group_rec->offset, + block_group_rec->objectid, + block_group_rec->flags); + ret = -1; + } else { + list_del_init(&block_group_rec->list); + chunk_rec->bg_rec = block_group_rec; + } + } else { + if (!silent) + fprintf(stderr, + "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n", + chunk_rec->objectid, + chunk_rec->type, + chunk_rec->offset, + chunk_rec->length, + chunk_rec->offset, + chunk_rec->type_flags); + ret = 1; + } + + if (metadump_v2) + return ret; + + length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length, + chunk_rec->num_stripes); + for (i = 0; i < chunk_rec->num_stripes; ++i) { + devid = chunk_rec->stripes[i].devid; + offset = chunk_rec->stripes[i].offset; + dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree, + devid, offset, length); + if (dev_extent_item) { + dev_extent_rec = container_of(dev_extent_item, + struct device_extent_record, + cache); + if (dev_extent_rec->objectid != devid || + dev_extent_rec->offset != offset || + dev_extent_rec->chunk_offset != chunk_rec->offset || + dev_extent_rec->length != length) { + if (!silent) + fprintf(stderr, + "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n", + chunk_rec->objectid, + chunk_rec->type, + chunk_rec->offset, + chunk_rec->stripes[i].devid, + chunk_rec->stripes[i].offset, + dev_extent_rec->objectid, + dev_extent_rec->offset, + dev_extent_rec->length); + ret = -1; + } else { + list_move(&dev_extent_rec->chunk_list, + &chunk_rec->dextents); + } + } else { + if (!silent) + fprintf(stderr, + "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n", + chunk_rec->objectid, + chunk_rec->type, + chunk_rec->offset, + chunk_rec->stripes[i].devid, + chunk_rec->stripes[i].offset); + ret = -1; + } + } + return ret; +} + +/* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */ +int check_chunks(struct cache_tree *chunk_cache, + struct block_group_tree *block_group_cache, + struct device_extent_tree *dev_extent_cache, + struct list_head *good, struct list_head *bad, + struct list_head *rebuild, int silent) +{ + struct cache_extent *chunk_item; + struct chunk_record *chunk_rec; + struct block_group_record *bg_rec; + struct device_extent_record *dext_rec; + int err; + int ret = 0; + + chunk_item = first_cache_extent(chunk_cache); + while (chunk_item) { + chunk_rec = container_of(chunk_item, struct chunk_record, + cache); + err = check_chunk_refs(chunk_rec, block_group_cache, + dev_extent_cache, silent); + if (err < 0) + ret = err; + if (err == 0 && good) + list_add_tail(&chunk_rec->list, good); + if (err > 0 && rebuild) + list_add_tail(&chunk_rec->list, rebuild); + if (err < 0 && bad) + list_add_tail(&chunk_rec->list, bad); + chunk_item = next_cache_extent(chunk_item); } - while (!list_empty(&rec->dups)) { - tmp = list_entry(rec->dups.next, struct extent_record, list); - list_del_init(&tmp->list); - free(tmp); + list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) { + if (!silent) + fprintf(stderr, + "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n", + bg_rec->objectid, + bg_rec->offset, + bg_rec->flags); + if (!ret) + ret = 1; } - btrfs_free_path(path); - - if (!ret && !nr_del) - rec->num_duplicates = 0; - - return ret ? ret : nr_del; + list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans, + chunk_list) { + if (!silent) + fprintf(stderr, + "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n", + dext_rec->objectid, + dext_rec->offset, + dext_rec->length); + if (!ret) + ret = 1; + } + return ret; } -static int find_possible_backrefs(struct btrfs_fs_info *info, - struct btrfs_path *path, - struct cache_tree *extent_cache, - struct extent_record *rec) + +static int check_device_used(struct device_record *dev_rec, + struct device_extent_tree *dext_cache) { - struct btrfs_root *root; - struct extent_backref *back; - struct data_backref *dback; struct cache_extent *cache; - struct btrfs_file_extent_item *fi; - struct btrfs_key key; - u64 bytenr, bytes; - int ret; + struct device_extent_record *dev_extent_rec; + u64 total_byte = 0; - list_for_each_entry(back, &rec->backrefs, list) { - /* Don't care about full backrefs (poor unloved backrefs) */ - if (back->full_backref || !back->is_data) - continue; + cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0); + while (cache) { + dev_extent_rec = container_of(cache, + struct device_extent_record, + cache); + if (dev_extent_rec->objectid != dev_rec->devid) + break; - dback = (struct data_backref *)back; + list_del_init(&dev_extent_rec->device_list); + total_byte += dev_extent_rec->length; + cache = next_cache_extent(cache); + } - /* We found this one, we don't need to do a lookup */ - if (dback->found_ref) - continue; + if (total_byte != dev_rec->byte_used) { + fprintf(stderr, + "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n", + total_byte, dev_rec->byte_used, dev_rec->objectid, + dev_rec->type, dev_rec->offset); + return -1; + } else { + return 0; + } +} - key.objectid = dback->root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; +/* check btrfs_dev_item -> btrfs_dev_extent */ +static int check_devices(struct rb_root *dev_cache, + struct device_extent_tree *dev_extent_cache) +{ + struct rb_node *dev_node; + struct device_record *dev_rec; + struct device_extent_record *dext_rec; + int err; + int ret = 0; - root = btrfs_read_fs_root(info, &key); + dev_node = rb_first(dev_cache); + while (dev_node) { + dev_rec = container_of(dev_node, struct device_record, node); + err = check_device_used(dev_rec, dev_extent_cache); + if (err) + ret = err; - /* No root, definitely a bad ref, skip */ - if (IS_ERR(root) && PTR_ERR(root) == -ENOENT) - continue; - /* Other err, exit */ - if (IS_ERR(root)) - return PTR_ERR(root); + dev_node = rb_next(dev_node); + } + list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans, + device_list) { + fprintf(stderr, + "Device extent[%llu, %llu, %llu] didn't find its device.\n", + dext_rec->objectid, dext_rec->offset, dext_rec->length); + if (!ret) + ret = 1; + } + return ret; +} - key.objectid = dback->owner; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = dback->offset; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret) { - btrfs_release_path(path); - if (ret < 0) - return ret; - /* Didn't find it, we can carry on */ - ret = 0; - continue; - } +static int add_root_item_to_list(struct list_head *head, + u64 objectid, u64 bytenr, u64 last_snapshot, + u8 level, u8 drop_level, + int level_size, struct btrfs_key *drop_key) +{ - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi); - bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi); - btrfs_release_path(path); - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (cache) { - struct extent_record *tmp; - tmp = container_of(cache, struct extent_record, cache); + struct root_item_record *ri_rec; + ri_rec = malloc(sizeof(*ri_rec)); + if (!ri_rec) + return -ENOMEM; + ri_rec->bytenr = bytenr; + ri_rec->objectid = objectid; + ri_rec->level = level; + ri_rec->level_size = level_size; + ri_rec->drop_level = drop_level; + ri_rec->last_snapshot = last_snapshot; + if (drop_key) + memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key)); + list_add_tail(&ri_rec->list, head); - /* - * If we found an extent record for the bytenr for this - * particular backref then we can't add it to our - * current extent record. We only want to add backrefs - * that don't have a corresponding extent item in the - * extent tree since they likely belong to this record - * and we need to fix it if it doesn't match bytenrs. - */ - if (tmp->found_rec) - continue; - } + return 0; +} - dback->found_ref += 1; - dback->disk_bytenr = bytenr; - dback->bytes = bytes; +static void free_root_item_list(struct list_head *list) +{ + struct root_item_record *ri_rec; - /* - * Set this so the verify backref code knows not to trust the - * values in this backref. - */ - back->broken = 1; + while (!list_empty(list)) { + ri_rec = list_first_entry(list, struct root_item_record, + list); + list_del_init(&ri_rec->list); + free(ri_rec); } - - return 0; } -/* - * Record orphan data ref into corresponding root. - * - * Return 0 if the extent item contains data ref and recorded. - * Return 1 if the extent item contains no useful data ref - * On that case, it may contains only shared_dataref or metadata backref - * or the file extent exists(this should be handled by the extent bytenr - * recovery routine) - * Return <0 if something goes wrong. - */ -static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, - struct extent_record *rec) +static int deal_root_from_list(struct list_head *list, + struct btrfs_root *root, + struct block_info *bits, + int bits_nr, + struct cache_tree *pending, + struct cache_tree *seen, + struct cache_tree *reada, + struct cache_tree *nodes, + struct cache_tree *extent_cache, + struct cache_tree *chunk_cache, + struct rb_root *dev_cache, + struct block_group_tree *block_group_cache, + struct device_extent_tree *dev_extent_cache) { - struct btrfs_key key; - struct btrfs_root *dest_root; - struct extent_backref *back; - struct data_backref *dback; - struct orphan_data_extent *orphan; - struct btrfs_path *path; - int recorded_data_ref = 0; int ret = 0; + u64 last; - if (rec->metadata) - return 1; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - list_for_each_entry(back, &rec->backrefs, list) { - if (back->full_backref || !back->is_data || - !back->found_extent_tree) - continue; - dback = (struct data_backref *)back; - if (dback->found_ref) - continue; - key.objectid = dback->root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - dest_root = btrfs_read_fs_root(fs_info, &key); - - /* For non-exist root we just skip it */ - if (IS_ERR(dest_root) || !dest_root) - continue; - - key.objectid = dback->owner; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = dback->offset; - - ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0); - /* - * For ret < 0, it's OK since the fs-tree may be corrupted, - * we need to record it for inode/file extent rebuild. - * For ret > 0, we record it only for file extent rebuild. - * For ret == 0, the file extent exists but only bytenr - * mismatch, let the original bytenr fix routine to handle, - * don't record it. + while (!list_empty(list)) { + struct root_item_record *rec; + struct extent_buffer *buf; + rec = list_entry(list->next, + struct root_item_record, list); + last = 0; + buf = read_tree_block(root->fs_info->tree_root, + rec->bytenr, rec->level_size, 0); + if (!extent_buffer_uptodate(buf)) { + free_extent_buffer(buf); + ret = -EIO; + break; + } + ret = add_root_to_pending(buf, extent_cache, pending, + seen, nodes, rec->objectid); + if (ret < 0) + break; + /* + * To rebuild extent tree, we need deal with snapshot + * one by one, otherwise we deal with node firstly which + * can maximize readahead. */ - if (ret == 0) - continue; - ret = 0; - orphan = malloc(sizeof(*orphan)); - if (!orphan) { - ret = -ENOMEM; - goto out; + while (1) { + ret = run_next_block(root, bits, bits_nr, &last, + pending, seen, reada, nodes, + extent_cache, chunk_cache, + dev_cache, block_group_cache, + dev_extent_cache, rec); + if (ret != 0) + break; } - INIT_LIST_HEAD(&orphan->list); - orphan->root = dback->root; - orphan->objectid = dback->owner; - orphan->offset = dback->offset; - orphan->disk_bytenr = rec->cache.start; - orphan->disk_len = rec->cache.size; - list_add(&dest_root->orphan_data_extents, &orphan->list); - recorded_data_ref = 1; + free_extent_buffer(buf); + list_del(&rec->list); + free(rec); + if (ret < 0) + break; } -out: - btrfs_free_path(path); - if (!ret) - return !recorded_data_ref; - else - return ret; + while (ret >= 0) { + ret = run_next_block(root, bits, bits_nr, &last, pending, seen, + reada, nodes, extent_cache, chunk_cache, + dev_cache, block_group_cache, + dev_extent_cache, NULL); + if (ret != 0) { + if (ret > 0) + ret = 0; + break; + } + } + return ret; } -/* - * when an incorrect extent item is found, this will delete - * all of the existing entries for it and recreate them - * based on what the tree scan found. - */ -static int fixup_extent_refs(struct btrfs_fs_info *info, - struct cache_tree *extent_cache, - struct extent_record *rec) +static int check_chunks_and_extents(struct btrfs_root *root) { - struct btrfs_trans_handle *trans = NULL; - int ret; - struct btrfs_path *path; - struct list_head *cur = rec->backrefs.next; - struct cache_extent *cache; - struct extent_backref *back; - int allocated = 0; - u64 flags = 0; + struct rb_root dev_cache; + struct cache_tree chunk_cache; + struct block_group_tree block_group_cache; + struct device_extent_tree dev_extent_cache; + struct cache_tree extent_cache; + struct cache_tree seen; + struct cache_tree pending; + struct cache_tree reada; + struct cache_tree nodes; + struct extent_io_tree excluded_extents; + struct cache_tree corrupt_blocks; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_key found_key; + int ret, err = 0; + struct block_info *bits; + int bits_nr; + struct extent_buffer *leaf; + int slot; + struct btrfs_root_item ri; + struct list_head dropping_trees; + struct list_head normal_trees; + struct btrfs_root *root1; + u64 objectid; + u32 level_size; + u8 level; - if (rec->flag_block_full_backref) - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + dev_cache = RB_ROOT; + cache_tree_init(&chunk_cache); + block_group_tree_init(&block_group_cache); + device_extent_tree_init(&dev_extent_cache); - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + cache_tree_init(&extent_cache); + cache_tree_init(&seen); + cache_tree_init(&pending); + cache_tree_init(&nodes); + cache_tree_init(&reada); + cache_tree_init(&corrupt_blocks); + extent_io_tree_init(&excluded_extents); + INIT_LIST_HEAD(&dropping_trees); + INIT_LIST_HEAD(&normal_trees); - if (rec->refs != rec->extent_item_refs && !rec->metadata) { - /* - * Sometimes the backrefs themselves are so broken they don't - * get attached to any meaningful rec, so first go back and - * check any of our backrefs that we couldn't find and throw - * them into the list if we find the backref so that - * verify_backrefs can figure out what to do. - */ - ret = find_possible_backrefs(info, path, extent_cache, rec); - if (ret < 0) - goto out; + if (repair) { + root->fs_info->excluded_extents = &excluded_extents; + root->fs_info->fsck_extent_cache = &extent_cache; + root->fs_info->free_extent_hook = free_extent_hook; + root->fs_info->corrupt_blocks = &corrupt_blocks; } - /* step one, make sure all of the backrefs agree */ - ret = verify_backrefs(info, path, rec); - if (ret < 0) - goto out; - - trans = btrfs_start_transaction(info->extent_root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; + bits_nr = 1024; + bits = malloc(bits_nr * sizeof(struct block_info)); + if (!bits) { + perror("malloc"); + exit(1); } - /* step two, delete all the existing records */ - ret = delete_extent_records(trans, info->extent_root, path, - rec->start, rec->max_size); + if (ctx.progress_enabled) { + ctx.tp = TASK_EXTENTS; + task_start(ctx.info); + } +again: + root1 = root->fs_info->tree_root; + level = btrfs_header_level(root1->node); + ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, + root1->node->start, 0, level, 0, + root1->nodesize, NULL); + if (ret < 0) + goto out; + root1 = root->fs_info->chunk_root; + level = btrfs_header_level(root1->node); + ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, + root1->node->start, 0, level, 0, + root1->nodesize, NULL); + if (ret < 0) + goto out; + btrfs_init_path(&path); + key.offset = 0; + key.objectid = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + ret = btrfs_search_slot(NULL, root->fs_info->tree_root, + &key, &path, 0, 0); if (ret < 0) goto out; + while(1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + if (slot >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret != 0) + break; + leaf = path.nodes[0]; + slot = path.slots[0]; + } + btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); + if (found_key.type == BTRFS_ROOT_ITEM_KEY) { + unsigned long offset; + u64 last_snapshot; - /* was this block corrupt? If so, don't add references to it */ - cache = lookup_cache_extent(info->corrupt_blocks, - rec->start, rec->max_size); - if (cache) { - ret = 0; + offset = btrfs_item_ptr_offset(leaf, path.slots[0]); + read_extent_buffer(leaf, &ri, offset, sizeof(ri)); + last_snapshot = btrfs_root_last_snapshot(&ri); + if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { + level = btrfs_root_level(&ri); + level_size = root->nodesize; + ret = add_root_item_to_list(&normal_trees, + found_key.objectid, + btrfs_root_bytenr(&ri), + last_snapshot, level, + 0, level_size, NULL); + if (ret < 0) + goto out; + } else { + level = btrfs_root_level(&ri); + level_size = root->nodesize; + objectid = found_key.objectid; + btrfs_disk_key_to_cpu(&found_key, + &ri.drop_progress); + ret = add_root_item_to_list(&dropping_trees, + objectid, + btrfs_root_bytenr(&ri), + last_snapshot, level, + ri.drop_level, + level_size, &found_key); + if (ret < 0) + goto out; + } + } + path.slots[0]++; + } + btrfs_release_path(&path); + + /* + * check_block can return -EAGAIN if it fixes something, please keep + * this in mind when dealing with return values from these functions, if + * we get -EAGAIN we want to fall through and restart the loop. + */ + ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending, + &seen, &reada, &nodes, &extent_cache, + &chunk_cache, &dev_cache, &block_group_cache, + &dev_extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } + ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr, + &pending, &seen, &reada, &nodes, + &extent_cache, &chunk_cache, &dev_cache, + &block_group_cache, &dev_extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; goto out; } - /* step three, recreate all the refs we did find */ - while(cur != &rec->backrefs) { - back = list_entry(cur, struct extent_backref, list); - cur = cur->next; + ret = check_chunks(&chunk_cache, &block_group_cache, + &dev_extent_cache, NULL, NULL, NULL, 0); + if (ret) { + if (ret == -EAGAIN) + goto loop; + err = ret; + } - /* - * if we didn't find any references, don't create a - * new extent record - */ - if (!back->found_ref) - continue; + ret = check_extent_refs(root, &extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } - rec->bad_full_backref = 0; - ret = record_extent(trans, info, path, rec, back, allocated, flags); - allocated = 1; + ret = check_devices(&dev_cache, &dev_extent_cache); + if (ret && err) + ret = err; - if (ret) - goto out; - } out: - if (trans) { - int err = btrfs_commit_transaction(trans, info->extent_root); - if (!ret) - ret = err; + task_stop(ctx.info); + if (repair) { + free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + extent_io_tree_cleanup(&excluded_extents); + root->fs_info->fsck_extent_cache = NULL; + root->fs_info->free_extent_hook = NULL; + root->fs_info->corrupt_blocks = NULL; + root->fs_info->excluded_extents = NULL; } - - btrfs_free_path(path); + free(bits); + free_chunk_cache_tree(&chunk_cache); + free_device_cache_tree(&dev_cache); + free_block_group_tree(&block_group_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); return ret; +loop: + free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); + free_chunk_cache_tree(&chunk_cache); + free_block_group_tree(&block_group_cache); + free_device_cache_tree(&dev_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_record_cache(root->fs_info, &extent_cache); + free_root_item_list(&normal_trees); + free_root_item_list(&dropping_trees); + extent_io_tree_cleanup(&excluded_extents); + goto again; } -static int fixup_extent_flags(struct btrfs_fs_info *fs_info, - struct extent_record *rec) +/* + * Check backrefs of a tree block given by @bytenr or @eb. + * + * @root: the root containing the @bytenr or @eb + * @eb: tree block extent buffer, can be NULL + * @bytenr: bytenr of the tree block to search + * @level: tree level of the tree block + * @owner: owner of the tree block + * + * Return >0 for any error found and output error message + * Return 0 for no error found + */ +static int check_tree_block_ref(struct btrfs_root *root, + struct extent_buffer *eb, u64 bytenr, + int level, u64 owner) { - struct btrfs_trans_handle *trans; - struct btrfs_root *root = fs_info->extent_root; - struct btrfs_path *path; - struct btrfs_extent_item *ei; struct btrfs_key key; - u64 flags; - int ret = 0; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path path; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct extent_buffer *leaf; + unsigned long end; + unsigned long ptr; + int slot; + int skinny_level; + int type; + u32 nodesize = root->nodesize; + u32 item_size; + u64 offset; + int found_ref = 0; + int err = 0; + int ret; - key.objectid = rec->start; - if (rec->metadata) { + btrfs_init_path(&path); + key.objectid = bytenr; + if (btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) key.type = BTRFS_METADATA_ITEM_KEY; - key.offset = rec->info_level; - } else { + else key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = rec->max_size; - } - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - btrfs_free_path(path); - return PTR_ERR(trans); - } + key.offset = (u64)-1; - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + /* Search for the backref in extent tree */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); if (ret < 0) { - btrfs_free_path(path); - btrfs_commit_transaction(trans, root); - return ret; - } else if (ret) { - fprintf(stderr, "Didn't find extent for %llu\n", - (unsigned long long)rec->start); - btrfs_free_path(path); - btrfs_commit_transaction(trans, root); - return -ENOENT; + err |= BACKREF_MISSING; + goto out; } - - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_item); - flags = btrfs_extent_flags(path->nodes[0], ei); - if (rec->flag_block_full_backref) { - fprintf(stderr, "setting full backref on %llu\n", - (unsigned long long)key.objectid); - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } else { - fprintf(stderr, "clearing full backref on %llu\n", - (unsigned long long)key.objectid); - flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) { + err |= BACKREF_MISSING; + goto out; } - btrfs_set_extent_flags(path->nodes[0], ei, flags); - btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); - return btrfs_commit_transaction(trans, root); -} -/* right now we only prune from the extent allocation tree */ -static int prune_one_block(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, - struct btrfs_corrupt_block *corrupt) -{ - int ret; - struct btrfs_path path; - struct extent_buffer *eb; - u64 found; - int slot; - int nritems; - int level = corrupt->level + 1; + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); - btrfs_init_path(&path); -again: - /* we want to stop at the parent to our busted block */ - path.lowest_level = level; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); - ret = btrfs_search_slot(trans, info->extent_root, - &corrupt->key, &path, -1, 1); + if (key.type == BTRFS_METADATA_ITEM_KEY) { + skinny_level = (int)key.offset; + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + struct btrfs_tree_block_info *info; - if (ret < 0) - goto out; + info = (struct btrfs_tree_block_info *)(ei + 1); + skinny_level = btrfs_tree_block_level(leaf, info); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } - eb = path.nodes[level]; - if (!eb) { - ret = -ENOENT; - goto out; + if (eb) { + u64 header_gen; + u64 extent_gen; + + if (!(btrfs_extent_flags(leaf, ei) & + BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + error( + "extent[%llu %u] backref type mismatch, missing bit: %llx", + key.objectid, nodesize, + BTRFS_EXTENT_FLAG_TREE_BLOCK); + err = BACKREF_MISMATCH; + } + header_gen = btrfs_header_generation(eb); + extent_gen = btrfs_extent_generation(leaf, ei); + if (header_gen != extent_gen) { + error( + "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu", + key.objectid, nodesize, header_gen, + extent_gen); + err = BACKREF_MISMATCH; + } + if (level != skinny_level) { + error( + "extent[%llu %u] level mismatch, wanted: %u, have: %u", + key.objectid, nodesize, level, skinny_level); + err = BACKREF_MISMATCH; + } + if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) { + error( + "extent[%llu %u] is referred by other roots than %llu", + key.objectid, nodesize, root->objectid); + err = BACKREF_MISMATCH; + } } /* - * hopefully the search gave us the block we want to prune, - * lets try that first + * Iterate the extent/metadata item to find the exact backref */ - slot = path.slots[level]; - found = btrfs_node_blockptr(eb, slot); - if (found == corrupt->cache.start) - goto del_ptr; + item_size = btrfs_item_size_nr(leaf, slot); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); - nritems = btrfs_header_nritems(eb); + if (type == BTRFS_TREE_BLOCK_REF_KEY && + (offset == root->objectid || offset == owner)) { + found_ref = 1; + } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { + /* Check if the backref points to valid referencer */ + found_ref = !check_tree_block_ref(root, NULL, offset, + level + 1, owner); + } - /* the search failed, lets scan this node and hope we find it */ - for (slot = 0; slot < nritems; slot++) { - found = btrfs_node_blockptr(eb, slot); - if (found == corrupt->cache.start) - goto del_ptr; + if (found_ref) + break; + ptr += btrfs_extent_inline_ref_size(type); } + /* - * we couldn't find the bad block. TODO, search all the nodes for pointers - * to this block + * Inlined extent item doesn't have what we need, check + * TREE_BLOCK_REF_KEY */ - if (eb == info->extent_root->node) { - ret = -ENOENT; - goto out; - } else { - level++; + if (!found_ref) { btrfs_release_path(&path); - goto again; - } - -del_ptr: - printk("deleting pointer to block %Lu\n", corrupt->cache.start); - ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot); + key.objectid = bytenr; + key.type = BTRFS_TREE_BLOCK_REF_KEY; + key.offset = root->objectid; + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (!ret) + found_ref = 1; + } + if (!found_ref) + err |= BACKREF_MISSING; out: btrfs_release_path(&path); - return ret; + if (eb && (err & BACKREF_MISSING)) + error("extent[%llu %u] backref lost (owner: %llu, level: %u)", + bytenr, nodesize, owner, level); + return err; } -static int prune_corrupt_blocks(struct btrfs_fs_info *info) +/* + * Check EXTENT_DATA item, mainly for its dbackref in extent tree + * + * Return >0 any error found and output error message + * Return 0 for no error found + */ +static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *eb, int slot) { - struct btrfs_trans_handle *trans = NULL; - struct cache_extent *cache; - struct btrfs_corrupt_block *corrupt; + struct btrfs_file_extent_item *fi; + struct btrfs_path path; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_key fi_key; + struct btrfs_key dbref_key; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + u64 owner; + u64 file_extent_gen; + u64 disk_bytenr; + u64 disk_num_bytes; + u64 extent_num_bytes; + u64 extent_flags; + u64 extent_gen; + u32 item_size; + unsigned long end; + unsigned long ptr; + int type; + u64 ref_root; + int found_dbackref = 0; + int err = 0; + int ret; - while (1) { - cache = search_cache_extent(info->corrupt_blocks, 0); - if (!cache) - break; - if (!trans) { - trans = btrfs_start_transaction(info->extent_root, 1); - if (IS_ERR(trans)) - return PTR_ERR(trans); - } - corrupt = container_of(cache, struct btrfs_corrupt_block, cache); - prune_one_block(trans, info, corrupt); - remove_cache_extent(info->corrupt_blocks, cache); + btrfs_item_key_to_cpu(eb, &fi_key, slot); + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + file_extent_gen = btrfs_file_extent_generation(eb, fi); + + /* Nothing to check for hole and inline data extents */ + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE || + btrfs_file_extent_disk_bytenr(eb, fi) == 0) + return 0; + + disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi); + disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi); + + /* Check unaligned disk_num_bytes and num_bytes */ + if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, disk_num_bytes, + root->sectorsize); + err |= BYTES_UNALIGNED; + } else { + data_bytes_allocated += disk_num_bytes; + } + if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, extent_num_bytes, + root->sectorsize); + err |= BYTES_UNALIGNED; + } else { + data_bytes_referenced += extent_num_bytes; } - if (trans) - return btrfs_commit_transaction(trans, info->extent_root); - return 0; -} + owner = btrfs_header_owner(eb); -static void reset_cached_block_groups(struct btrfs_fs_info *fs_info) -{ - struct btrfs_block_group_cache *cache; - u64 start, end; - int ret; + /* Check the extent item of the file extent in extent tree */ + btrfs_init_path(&path); + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_ITEM_KEY; + dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi); - while (1) { - ret = find_first_extent_bit(&fs_info->free_space_cache, 0, - &start, &end, EXTENT_DIRTY); - if (ret) + ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0); + if (ret) { + err |= BACKREF_MISSING; + goto error; + } + + leaf = path.nodes[0]; + slot = path.slots[0]; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + extent_flags = btrfs_extent_flags(leaf, ei); + extent_gen = btrfs_extent_generation(leaf, ei); + + if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) { + error( + "extent[%llu %llu] backref type mismatch, wanted bit: %llx", + disk_bytenr, disk_num_bytes, + BTRFS_EXTENT_FLAG_DATA); + err |= BACKREF_MISMATCH; + } + + if (file_extent_gen < extent_gen) { + error( +"extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu", + disk_bytenr, disk_num_bytes, file_extent_gen, + extent_gen); + err |= BACKREF_MISMATCH; + } + + /* Check data backref inside that extent item */ + item_size = btrfs_item_size_nr(leaf, path.slots[0]); + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + ref_root = btrfs_extent_data_ref_root(leaf, dref); + if (ref_root == owner || ref_root == root->objectid) + found_dbackref = 1; + } else if (type == BTRFS_SHARED_DATA_REF_KEY) { + found_dbackref = !check_tree_block_ref(root, NULL, + btrfs_extent_inline_ref_offset(leaf, iref), + 0, owner); + } + + if (found_dbackref) break; - clear_extent_dirty(&fs_info->free_space_cache, start, end, - GFP_NOFS); + ptr += btrfs_extent_inline_ref_size(type); + } + + /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */ + if (!found_dbackref) { + btrfs_release_path(&path); + + btrfs_init_path(&path); + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY; + dbref_key.offset = hash_extent_data_ref(root->objectid, + fi_key.objectid, fi_key.offset); + + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &dbref_key, &path, 0, 0); + if (!ret) + found_dbackref = 1; } - start = 0; - while (1) { - cache = btrfs_lookup_first_block_group(fs_info, start); - if (!cache) - break; - if (cache->cached) - cache->cached = 0; - start = cache->key.objectid + cache->key.offset; + if (!found_dbackref) + err |= BACKREF_MISSING; +error: + btrfs_release_path(&path); + if (err & BACKREF_MISSING) { + error("data extent[%llu %llu] backref lost", + disk_bytenr, disk_num_bytes); } + return err; } -static int check_extent_refs(struct btrfs_root *root, - struct cache_tree *extent_cache) +/* + * Get real tree block level for the case like shared block + * Return >= 0 as tree level + * Return <0 for error + */ +static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr) { - struct extent_record *rec; - struct cache_extent *cache; - int err = 0; - int ret = 0; - int fixed = 0; - int had_dups = 0; - int recorded = 0; + struct extent_buffer *eb; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_extent_item *ei; + u64 flags; + u64 transid; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u8 backref_level; + u8 header_level; + int ret; - if (repair) { - /* - * if we're doing a repair, we have to make sure - * we don't allocate from the problem extents. - * In the worst case, this will be all the - * extents in the FS - */ - cache = search_cache_extent(extent_cache, 0); - while(cache) { - rec = container_of(cache, struct extent_record, cache); - set_extent_dirty(root->fs_info->excluded_extents, - rec->start, - rec->start + rec->max_size - 1, - GFP_NOFS); - cache = next_cache_extent(cache); - } + /* Search extent tree for extent generation and level */ + key.objectid = bytenr; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; - /* pin down all the corrupted blocks too */ - cache = search_cache_extent(root->fs_info->corrupt_blocks, 0); - while(cache) { - set_extent_dirty(root->fs_info->excluded_extents, - cache->start, - cache->start + cache->size - 1, - GFP_NOFS); - cache = next_cache_extent(cache); - } - prune_corrupt_blocks(root->fs_info); - reset_cached_block_groups(root->fs_info); + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0); + if (ret < 0) + goto release_out; + ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr); + if (ret < 0) + goto release_out; + if (ret > 0) { + ret = -ENOENT; + goto release_out; } - reset_cached_block_groups(root->fs_info); + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path.nodes[0], ei); + if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + ret = -ENOENT; + goto release_out; + } - /* - * We need to delete any duplicate entries we find first otherwise we - * could mess up the extent tree when we have backrefs that actually - * belong to a different extent item and not the weird duplicate one. - */ - while (repair && !list_empty(&duplicate_extents)) { - rec = list_entry(duplicate_extents.next, struct extent_record, - list); - list_del_init(&rec->list); + /* Get transid for later read_tree_block() check */ + transid = btrfs_extent_generation(path.nodes[0], ei); - /* Sometimes we can find a backref before we find an actual - * extent, so we need to process it a little bit to see if there - * truly are multiple EXTENT_ITEM_KEY's for the same range, or - * if this is a backref screwup. If we need to delete stuff - * process_duplicates() will return 0, otherwise it will return - * 1 and we - */ - if (process_duplicates(root, extent_cache, rec)) - continue; - ret = delete_duplicate_records(root, rec); - if (ret < 0) - return ret; - /* - * delete_duplicate_records will return the number of entries - * deleted, so if it's greater than 0 then we know we actually - * did something and we need to remove. - */ - if (ret) - had_dups = 1; + /* Get backref level as one source */ + if (key.type == BTRFS_METADATA_ITEM_KEY) { + backref_level = key.offset; + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + backref_level = btrfs_tree_block_level(path.nodes[0], info); } + btrfs_release_path(&path); - if (had_dups) - return -EAGAIN; + /* Get level from tree block as an alternative source */ + eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid); + if (!extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } + header_level = btrfs_header_level(eb); + free_extent_buffer(eb); - while(1) { - int cur_err = 0; + if (header_level != backref_level) + return -EIO; + return header_level; - fixed = 0; - recorded = 0; - cache = search_cache_extent(extent_cache, 0); - if (!cache) - break; - rec = container_of(cache, struct extent_record, cache); - if (rec->num_duplicates) { - fprintf(stderr, "extent item %llu has multiple extent " - "items\n", (unsigned long long)rec->start); - err = 1; - cur_err = 1; - } +release_out: + btrfs_release_path(&path); + return ret; +} - if (rec->refs != rec->extent_item_refs) { - fprintf(stderr, "ref mismatch on [%llu %llu] ", - (unsigned long long)rec->start, - (unsigned long long)rec->nr); - fprintf(stderr, "extent item %llu, found %llu\n", - (unsigned long long)rec->extent_item_refs, - (unsigned long long)rec->refs); - ret = record_orphan_data_extents(root->fs_info, rec); - if (ret < 0) - goto repair_abort; - if (ret == 0) { - recorded = 1; - } else { - /* - * we can't use the extent to repair file - * extent, let the fallback method handle it. - */ - if (!fixed && repair) { - ret = fixup_extent_refs( - root->fs_info, - extent_cache, rec); - if (ret) - goto repair_abort; - fixed = 1; - } - } - err = 1; - cur_err = 1; - } - if (all_backpointers_checked(rec, 1)) { - fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", - (unsigned long long)rec->start, - (unsigned long long)rec->nr); +/* + * Check if a tree block backref is valid (points to a valid tree block) + * if level == -1, level will be resolved + * Return >0 for any error found and print error message + */ +static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id, + u64 bytenr, int level) +{ + struct btrfs_root *root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *eb; + struct extent_buffer *node; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + int err = 0; + int ret; - if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(root->fs_info, - extent_cache, rec); - if (ret) - goto repair_abort; - fixed = 1; - } - cur_err = 1; - err = 1; - } - if (!rec->owner_ref_checked) { - fprintf(stderr, "owner ref check failed [%llu %llu]\n", - (unsigned long long)rec->start, - (unsigned long long)rec->nr); - if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(root->fs_info, - extent_cache, rec); - if (ret) - goto repair_abort; - fixed = 1; - } - err = 1; - cur_err = 1; - } - if (rec->bad_full_backref) { - fprintf(stderr, "bad full backref, on [%llu]\n", - (unsigned long long)rec->start); - if (repair) { - ret = fixup_extent_flags(root->fs_info, rec); - if (ret) - goto repair_abort; - fixed = 1; - } - err = 1; - cur_err = 1; - } - /* - * Although it's not a extent ref's problem, we reuse this - * routine for error reporting. - * No repair function yet. - */ - if (rec->crossing_stripes) { - fprintf(stderr, - "bad metadata [%llu, %llu) crossing stripe boundary\n", - rec->start, rec->start + rec->max_size); - err = 1; - cur_err = 1; - } + /* Query level for level == -1 special case */ + if (level == -1) + level = query_tree_block_level(fs_info, bytenr); + if (level < 0) { + err |= REFERENCER_MISSING; + goto out; + } - if (rec->wrong_chunk_type) { - fprintf(stderr, - "bad extent [%llu, %llu), type mismatch with chunk\n", - rec->start, rec->start + rec->max_size); - err = 1; - cur_err = 1; - } + key.objectid = root_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; - remove_cache_extent(extent_cache, cache); - free_all_extent_backrefs(rec); - if (!init_extent_tree && repair && (!cur_err || fixed)) - clear_extent_dirty(root->fs_info->excluded_extents, - rec->start, - rec->start + rec->max_size - 1, - GFP_NOFS); - free(rec); + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) { + err |= REFERENCER_MISSING; + goto out; } -repair_abort: - if (repair) { - if (ret && ret != -EAGAIN) { - fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); - exit(1); - } else if (!ret) { - struct btrfs_trans_handle *trans; - root = root->fs_info->extent_root; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto repair_abort; - } + /* Read out the tree block to get item/node key */ + eb = read_tree_block(root, bytenr, root->nodesize, 0); + if (!extent_buffer_uptodate(eb)) { + err |= REFERENCER_MISSING; + free_extent_buffer(eb); + goto out; + } + + /* Empty tree, no need to check key */ + if (!btrfs_header_nritems(eb) && !level) { + free_extent_buffer(eb); + goto out; + } + + if (level) + btrfs_node_key_to_cpu(eb, &key, 0); + else + btrfs_item_key_to_cpu(eb, &key, 0); + + free_extent_buffer(eb); + + btrfs_init_path(&path); + path.lowest_level = level; + /* Search with the first key, to ensure we can reach it */ + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + err |= REFERENCER_MISSING; + goto release_out; + } - btrfs_fix_block_accounting(trans, root); - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto repair_abort; - } - if (err) - fprintf(stderr, "repaired damaged extent references\n"); - return ret; + node = path.nodes[level]; + if (btrfs_header_bytenr(node) != bytenr) { + error( + "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu", + bytenr, nodesize, bytenr, + btrfs_header_bytenr(node)); + err |= REFERENCER_MISMATCH; + } + if (btrfs_header_level(node) != level) { + error( + "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d", + bytenr, nodesize, level, + btrfs_header_level(node)); + err |= REFERENCER_MISMATCH; + } + +release_out: + btrfs_release_path(&path); +out: + if (err & REFERENCER_MISSING) { + if (level < 0) + error("extent [%llu %d] lost referencer (owner: %llu)", + bytenr, nodesize, root_id); + else + error( + "extent [%llu %d] lost referencer (owner: %llu, level: %u)", + bytenr, nodesize, root_id, level); } + return err; } -u64 calc_stripe_length(u64 type, u64 length, int num_stripes) +/* + * Check referencer for shared block backref + * If level == -1, this function will resolve the level. + */ +static int check_shared_block_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr, int level) { - u64 stripe_size; + struct extent_buffer *eb; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 nr; + int found_parent = 0; + int i; - if (type & BTRFS_BLOCK_GROUP_RAID0) { - stripe_size = length; - stripe_size /= num_stripes; - } else if (type & BTRFS_BLOCK_GROUP_RAID10) { - stripe_size = length * 2; - stripe_size /= num_stripes; - } else if (type & BTRFS_BLOCK_GROUP_RAID5) { - stripe_size = length; - stripe_size /= (num_stripes - 1); - } else if (type & BTRFS_BLOCK_GROUP_RAID6) { - stripe_size = length; - stripe_size /= (num_stripes - 2); - } else { - stripe_size = length; + eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + if (!extent_buffer_uptodate(eb)) + goto out; + + if (level == -1) + level = query_tree_block_level(fs_info, bytenr); + if (level < 0) + goto out; + + if (level + 1 != btrfs_header_level(eb)) + goto out; + + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + if (bytenr == btrfs_node_blockptr(eb, i)) { + found_parent = 1; + break; + } } - return stripe_size; +out: + free_extent_buffer(eb); + if (!found_parent) { + error( + "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)", + bytenr, nodesize, parent, level); + return REFERENCER_MISSING; + } + return 0; } /* - * Check the chunk with its block group/dev list ref: - * Return 0 if all refs seems valid. - * Return 1 if part of refs seems valid, need later check for rebuild ref - * like missing block group and needs to search extent tree to rebuild them. - * Return -1 if essential refs are missing and unable to rebuild. + * Check referencer for normal (inlined) data ref + * If len == 0, it will be resolved by searching in extent tree */ -static int check_chunk_refs(struct chunk_record *chunk_rec, - struct block_group_tree *block_group_cache, - struct device_extent_tree *dev_extent_cache, - int silent) +static int check_extent_data_backref(struct btrfs_fs_info *fs_info, + u64 root_id, u64 objectid, u64 offset, + u64 bytenr, u64 len, u32 count) { - struct cache_extent *block_group_item; - struct block_group_record *block_group_rec; - struct cache_extent *dev_extent_item; - struct device_extent_record *dev_extent_rec; - u64 devid; - u64 offset; - u64 length; - int metadump_v2 = 0; - int i; + struct btrfs_root *root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + u32 found_count = 0; + int slot; int ret = 0; - block_group_item = lookup_cache_extent(&block_group_cache->tree, - chunk_rec->offset, - chunk_rec->length); - if (block_group_item) { - block_group_rec = container_of(block_group_item, - struct block_group_record, - cache); - if (chunk_rec->length != block_group_rec->offset || - chunk_rec->offset != block_group_rec->objectid || - (!metadump_v2 && - chunk_rec->type_flags != block_group_rec->flags)) { - if (!silent) - fprintf(stderr, - "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n", - chunk_rec->objectid, - chunk_rec->type, - chunk_rec->offset, - chunk_rec->length, - chunk_rec->offset, - chunk_rec->type_flags, - block_group_rec->objectid, - block_group_rec->type, - block_group_rec->offset, - block_group_rec->offset, - block_group_rec->objectid, - block_group_rec->flags); - ret = -1; - } else { - list_del_init(&block_group_rec->list); - chunk_rec->bg_rec = block_group_rec; - } - } else { - if (!silent) - fprintf(stderr, - "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n", - chunk_rec->objectid, - chunk_rec->type, - chunk_rec->offset, - chunk_rec->length, - chunk_rec->offset, - chunk_rec->type_flags); - ret = 1; + if (!len) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) + goto out; + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) + goto out; + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid != bytenr || + key.type != BTRFS_EXTENT_ITEM_KEY) + goto out; + len = key.offset; + btrfs_release_path(&path); } + key.objectid = root_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + btrfs_init_path(&path); - if (metadump_v2) - return ret; + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) + goto out; - length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length, - chunk_rec->num_stripes); - for (i = 0; i < chunk_rec->num_stripes; ++i) { - devid = chunk_rec->stripes[i].devid; - offset = chunk_rec->stripes[i].offset; - dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree, - devid, offset, length); - if (dev_extent_item) { - dev_extent_rec = container_of(dev_extent_item, - struct device_extent_record, - cache); - if (dev_extent_rec->objectid != devid || - dev_extent_rec->offset != offset || - dev_extent_rec->chunk_offset != chunk_rec->offset || - dev_extent_rec->length != length) { - if (!silent) - fprintf(stderr, - "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n", - chunk_rec->objectid, - chunk_rec->type, - chunk_rec->offset, - chunk_rec->stripes[i].devid, - chunk_rec->stripes[i].offset, - dev_extent_rec->objectid, - dev_extent_rec->offset, - dev_extent_rec->length); - ret = -1; - } else { - list_move(&dev_extent_rec->chunk_list, - &chunk_rec->dextents); - } - } else { - if (!silent) - fprintf(stderr, - "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n", - chunk_rec->objectid, - chunk_rec->type, - chunk_rec->offset, - chunk_rec->stripes[i].devid, - chunk_rec->stripes[i].offset); - ret = -1; - } + key.objectid = objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + /* + * It can be nasty as data backref offset is + * file offset - file extent offset, which is smaller or + * equal to original backref offset. The only special case is + * overflow. So we need to special check and do further search. + */ + key.offset = offset & (1ULL << 63) ? 0 : offset; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + + /* + * Search afterwards to get correct one + * NOTE: As we must do a comprehensive check on the data backref to + * make sure the dref count also matches, we must iterate all file + * extents for that inode. + */ + while (1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) + break; + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + /* + * Except normal disk bytenr and disk num bytes, we still + * need to do extra check on dbackref offset as + * dbackref offset = file_offset - file_extent_offset + */ + if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr && + btrfs_file_extent_disk_num_bytes(leaf, fi) == len && + (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) == + offset) + found_count++; + + ret = btrfs_next_item(root, &path); + if (ret) + break; } - return ret; +out: + btrfs_release_path(&path); + if (found_count != count) { + error( +"extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u", + bytenr, len, root_id, objectid, offset, count, found_count); + return REFERENCER_MISSING; + } + return 0; } -/* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */ -int check_chunks(struct cache_tree *chunk_cache, - struct block_group_tree *block_group_cache, - struct device_extent_tree *dev_extent_cache, - struct list_head *good, struct list_head *bad, - struct list_head *rebuild, int silent) +/* + * Check if the referencer of a shared data backref exists + */ +static int check_shared_data_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr) { - struct cache_extent *chunk_item; - struct chunk_record *chunk_rec; - struct block_group_record *bg_rec; - struct device_extent_record *dext_rec; - int err; - int ret = 0; + struct extent_buffer *eb; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 nr; + int found_parent = 0; + int i; - chunk_item = first_cache_extent(chunk_cache); - while (chunk_item) { - chunk_rec = container_of(chunk_item, struct chunk_record, - cache); - err = check_chunk_refs(chunk_rec, block_group_cache, - dev_extent_cache, silent); - if (err < 0) - ret = err; - if (err == 0 && good) - list_add_tail(&chunk_rec->list, good); - if (err > 0 && rebuild) - list_add_tail(&chunk_rec->list, rebuild); - if (err < 0 && bad) - list_add_tail(&chunk_rec->list, bad); - chunk_item = next_cache_extent(chunk_item); + eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + if (!extent_buffer_uptodate(eb)) + goto out; + + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + + fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE) + continue; + + if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) { + found_parent = 1; + break; + } + } + +out: + free_extent_buffer(eb); + if (!found_parent) { + error("shared extent %llu referencer lost (parent: %llu)", + bytenr, parent); + return REFERENCER_MISSING; } + return 0; +} + +/* + * This function will check a given extent item, including its backref and + * itself (like crossing stripe boundary and type) + * + * Since we don't use extent_record anymore, introduce new error bit + */ +static int check_extent_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + unsigned long end; + unsigned long ptr; + int type; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 flags; + u64 offset; + int metadata = 0; + int level; + struct btrfs_key key; + int ret; + int err = 0; - list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) { - if (!silent) - fprintf(stderr, - "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n", - bg_rec->objectid, - bg_rec->offset, - bg_rec->flags); - if (!ret) - ret = 1; + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.type == BTRFS_EXTENT_ITEM_KEY) + bytes_used += key.offset; + else + bytes_used += nodesize; + + if (item_size < sizeof(*ei)) { + /* + * COMPAT_EXTENT_TREE_V0 case, but it's already a super + * old thing when on disk format is still un-determined. + * No need to care about it anymore + */ + error("unsupported COMPAT_EXTENT_TREE_V0 detected"); + return -ENOTTY; } - list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans, - chunk_list) { - if (!silent) - fprintf(stderr, - "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n", - dext_rec->objectid, - dext_rec->offset, - dext_rec->length); - if (!ret) - ret = 1; + ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); + flags = btrfs_extent_flags(eb, ei); + + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; + if (metadata && check_crossing_stripes(global_info, key.objectid, + eb->len)) { + error("bad metadata [%llu, %llu) crossing stripe boundary", + key.objectid, key.objectid + nodesize); + err |= CROSSING_STRIPE_BOUNDARY; } - return ret; -} + ptr = (unsigned long)(ei + 1); -static int check_device_used(struct device_record *dev_rec, - struct device_extent_tree *dext_cache) -{ - struct cache_extent *cache; - struct device_extent_record *dev_extent_rec; - u64 total_byte = 0; + if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) { + /* Old EXTENT_ITEM metadata */ + struct btrfs_tree_block_info *info; - cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0); - while (cache) { - dev_extent_rec = container_of(cache, - struct device_extent_record, - cache); - if (dev_extent_rec->objectid != dev_rec->devid) - break; + info = (struct btrfs_tree_block_info *)ptr; + level = btrfs_tree_block_level(eb, info); + ptr += sizeof(struct btrfs_tree_block_info); + } else { + /* New METADATA_ITEM */ + level = key.offset; + } + end = (unsigned long)ei + item_size; - list_del_init(&dev_extent_rec->device_list); - total_byte += dev_extent_rec->length; - cache = next_cache_extent(cache); + if (ptr >= end) { + err |= ITEM_SIZE_MISMATCH; + goto out; } - if (total_byte != dev_rec->byte_used) { - fprintf(stderr, - "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n", - total_byte, dev_rec->byte_used, dev_rec->objectid, - dev_rec->type, dev_rec->offset); - return -1; - } else { - return 0; + /* Now check every backref in this extent item */ +next: + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(eb, iref); + offset = btrfs_extent_inline_ref_offset(eb, iref); + switch (type) { + case BTRFS_TREE_BLOCK_REF_KEY: + ret = check_tree_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = check_shared_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + ret = check_extent_data_backref(fs_info, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, dref), + btrfs_extent_data_ref_offset(eb, dref), + key.objectid, key.offset, + btrfs_extent_data_ref_count(eb, dref)); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + ret = check_shared_data_backref(fs_info, offset, key.objectid); + err |= ret; + break; + default: + error("extent[%llu %d %llu] has unknown ref type: %d", + key.objectid, key.type, key.offset, type); + err |= UNKNOWN_TYPE; + goto out; } + + ptr += btrfs_extent_inline_ref_size(type); + if (ptr < end) + goto next; + +out: + return err; } -/* check btrfs_dev_item -> btrfs_dev_extent */ -static int check_devices(struct rb_root *dev_cache, - struct device_extent_tree *dev_extent_cache) +/* + * Check if a dev extent item is referred correctly by its chunk + */ +static int check_dev_extent_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) { - struct rb_node *dev_node; - struct device_record *dev_rec; - struct device_extent_record *dext_rec; - int err; - int ret = 0; + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_dev_extent *ptr; + struct btrfs_path path; + struct btrfs_key chunk_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; + struct extent_buffer *l; + int num_stripes; + u64 length; + int i; + int found_chunk = 0; + int ret; - dev_node = rb_first(dev_cache); - while (dev_node) { - dev_rec = container_of(dev_node, struct device_record, node); - err = check_device_used(dev_rec, dev_extent_cache); - if (err) - ret = err; + btrfs_item_key_to_cpu(eb, &devext_key, slot); + ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent); + length = btrfs_dev_extent_length(eb, ptr); - dev_node = rb_next(dev_node); + chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr); + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr); + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) + goto out; + + l = path.nodes[0]; + chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk); + if (btrfs_chunk_length(l, chunk) != length) + goto out; + + num_stripes = btrfs_chunk_num_stripes(l, chunk); + for (i = 0; i < num_stripes; i++) { + u64 devid = btrfs_stripe_devid_nr(l, chunk, i); + u64 offset = btrfs_stripe_offset_nr(l, chunk, i); + + if (devid == devext_key.objectid && + offset == devext_key.offset) { + found_chunk = 1; + break; + } } - list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans, - device_list) { - fprintf(stderr, - "Device extent[%llu, %llu, %llu] didn't find its device.\n", - dext_rec->objectid, dext_rec->offset, dext_rec->length); - if (!ret) - ret = 1; +out: + btrfs_release_path(&path); + if (!found_chunk) { + error( + "device extent[%llu, %llu, %llu] did not find the related chunk", + devext_key.objectid, devext_key.offset, length); + return REFERENCER_MISSING; } - return ret; + return 0; } -static int add_root_item_to_list(struct list_head *head, - u64 objectid, u64 bytenr, u64 last_snapshot, - u8 level, u8 drop_level, - int level_size, struct btrfs_key *drop_key) +/* + * Check if the used space is correct with the dev item + */ +static int check_dev_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) { + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_dev_item *dev_item; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_dev_extent *ptr; + u64 dev_id; + u64 used; + u64 total = 0; + int ret; - struct root_item_record *ri_rec; - ri_rec = malloc(sizeof(*ri_rec)); - if (!ri_rec) - return -ENOMEM; - ri_rec->bytenr = bytenr; - ri_rec->objectid = objectid; - ri_rec->level = level; - ri_rec->level_size = level_size; - ri_rec->drop_level = drop_level; - ri_rec->last_snapshot = last_snapshot; - if (drop_key) - memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key)); - list_add_tail(&ri_rec->list, head); + dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item); + dev_id = btrfs_device_id(eb, dev_item); + used = btrfs_device_bytes_used(eb, dev_item); - return 0; -} + key.objectid = dev_id; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = 0; -static void free_root_item_list(struct list_head *list) -{ - struct root_item_record *ri_rec; + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0); + if (ret < 0) { + btrfs_item_key_to_cpu(eb, &key, slot); + error("cannot find any related dev extent for dev[%llu, %u, %llu]", + key.objectid, key.type, key.offset); + btrfs_release_path(&path); + return REFERENCER_MISSING; + } - while (!list_empty(list)) { - ri_rec = list_first_entry(list, struct root_item_record, - list); - list_del_init(&ri_rec->list); - free(ri_rec); + /* Iterate dev_extents to calculate the used space of a device */ + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + + if (key.objectid > dev_id) + break; + if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id) + goto next; + + ptr = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_dev_extent); + total += btrfs_dev_extent_length(path.nodes[0], ptr); +next: + ret = btrfs_next_item(dev_root, &path); + if (ret) + break; + } + btrfs_release_path(&path); + + if (used != total) { + btrfs_item_key_to_cpu(eb, &key, slot); + error( +"Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]", + total, used, BTRFS_ROOT_TREE_OBJECTID, + BTRFS_DEV_EXTENT_KEY, dev_id); + return ACCOUNTING_MISMATCH; } + return 0; } -static int deal_root_from_list(struct list_head *list, - struct btrfs_root *root, - struct block_info *bits, - int bits_nr, - struct cache_tree *pending, - struct cache_tree *seen, - struct cache_tree *reada, - struct cache_tree *nodes, - struct cache_tree *extent_cache, - struct cache_tree *chunk_cache, - struct rb_root *dev_cache, - struct block_group_tree *block_group_cache, - struct device_extent_tree *dev_extent_cache) +/* + * Check a block group item with its referener (chunk) and its used space + * with extent/metadata item + */ +static int check_block_group_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) { - int ret = 0; - u64 last; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_path path; + struct btrfs_key bg_key; + struct btrfs_key chunk_key; + struct btrfs_key extent_key; + struct btrfs_chunk *chunk; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u64 flags; + u64 bg_flags; + u64 used; + u64 total = 0; + int ret; + int err = 0; - while (!list_empty(list)) { - struct root_item_record *rec; - struct extent_buffer *buf; - rec = list_entry(list->next, - struct root_item_record, list); - last = 0; - buf = read_tree_block(root->fs_info->tree_root, - rec->bytenr, rec->level_size, 0); - if (!extent_buffer_uptodate(buf)) { - free_extent_buffer(buf); - ret = -EIO; - break; - } - add_root_to_pending(buf, extent_cache, pending, - seen, nodes, rec->objectid); - /* - * To rebuild extent tree, we need deal with snapshot - * one by one, otherwise we deal with node firstly which - * can maximize readahead. - */ - while (1) { - ret = run_next_block(root, bits, bits_nr, &last, - pending, seen, reada, nodes, - extent_cache, chunk_cache, - dev_cache, block_group_cache, - dev_extent_cache, rec); - if (ret != 0) - break; + btrfs_item_key_to_cpu(eb, &bg_key, slot); + bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item); + read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item)); + used = btrfs_block_group_used(&bg_item); + bg_flags = btrfs_block_group_flags(&bg_item); + + chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = bg_key.objectid; + + btrfs_init_path(&path); + /* Search for the referencer chunk */ + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) { + error( + "block group[%llu %llu] did not find the related chunk item", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISSING; + } else { + chunk = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_chunk); + if (btrfs_chunk_length(path.nodes[0], chunk) != + bg_key.offset) { + error( + "block group[%llu %llu] related chunk item length does not match", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISMATCH; } - free_extent_buffer(buf); - list_del(&rec->list); - free(rec); - if (ret < 0) - break; } - while (ret >= 0) { - ret = run_next_block(root, bits, bits_nr, &last, pending, seen, - reada, nodes, extent_cache, chunk_cache, - dev_cache, block_group_cache, - dev_extent_cache, NULL); - if (ret != 0) { - if (ret > 0) - ret = 0; + btrfs_release_path(&path); + + /* Search from the block group bytenr */ + extent_key.objectid = bg_key.objectid; + extent_key.type = 0; + extent_key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0); + if (ret < 0) + goto out; + + /* Iterate extent tree to account used space */ + while (1) { + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]); + if (extent_key.objectid >= bg_key.objectid + bg_key.offset) break; + + if (extent_key.type != BTRFS_METADATA_ITEM_KEY && + extent_key.type != BTRFS_EXTENT_ITEM_KEY) + goto next; + if (extent_key.objectid < bg_key.objectid) + goto next; + + if (extent_key.type == BTRFS_METADATA_ITEM_KEY) + total += nodesize; + else + total += extent_key.offset; + + ei = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + if (flags & BTRFS_EXTENT_FLAG_DATA) { + if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + extent_key.offset); + err |= CHUNK_TYPE_MISMATCH; + } + } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + nodesize); + err |= CHUNK_TYPE_MISMATCH; + } } +next: + ret = btrfs_next_item(extent_root, &path); + if (ret) + break; } - return ret; + +out: + btrfs_release_path(&path); + + if (total != used) { + error( + "block group[%llu %llu] used %llu but extent items used %llu", + bg_key.objectid, bg_key.offset, used, total); + err |= ACCOUNTING_MISMATCH; + } + return err; } -static int check_chunks_and_extents(struct btrfs_root *root) +/* + * Check a chunk item. + * Including checking all referred dev_extents and block group + */ +static int check_chunk_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) { - struct rb_root dev_cache; - struct cache_tree chunk_cache; - struct block_group_tree block_group_cache; - struct device_extent_tree dev_extent_cache; - struct cache_tree extent_cache; - struct cache_tree seen; - struct cache_tree pending; - struct cache_tree reada; - struct cache_tree nodes; - struct extent_io_tree excluded_extents; - struct cache_tree corrupt_blocks; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *dev_root = fs_info->dev_root; struct btrfs_path path; - struct btrfs_key key; - struct btrfs_key found_key; - int ret, err = 0; - struct block_info *bits; - int bits_nr; + struct btrfs_key chunk_key; + struct btrfs_key bg_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; struct extent_buffer *leaf; - int slot; - struct btrfs_root_item ri; - struct list_head dropping_trees; - struct list_head normal_trees; - struct btrfs_root *root1; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_dev_extent *ptr; + u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy); + u64 length; + u64 chunk_end; + u64 type; + u64 profile; + int num_stripes; + u64 offset; u64 objectid; - u32 level_size; - u8 level; - - dev_cache = RB_ROOT; - cache_tree_init(&chunk_cache); - block_group_tree_init(&block_group_cache); - device_extent_tree_init(&dev_extent_cache); + int i; + int ret; + int err = 0; - cache_tree_init(&extent_cache); - cache_tree_init(&seen); - cache_tree_init(&pending); - cache_tree_init(&nodes); - cache_tree_init(&reada); - cache_tree_init(&corrupt_blocks); - extent_io_tree_init(&excluded_extents); - INIT_LIST_HEAD(&dropping_trees); - INIT_LIST_HEAD(&normal_trees); + btrfs_item_key_to_cpu(eb, &chunk_key, slot); + chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); + length = btrfs_chunk_length(eb, chunk); + chunk_end = chunk_key.offset + length; + if (!IS_ALIGNED(length, sectorsize)) { + error("chunk[%llu %llu) not aligned to %u", + chunk_key.offset, chunk_end, sectorsize); + err |= BYTES_UNALIGNED; + goto out; + } - if (repair) { - root->fs_info->excluded_extents = &excluded_extents; - root->fs_info->fsck_extent_cache = &extent_cache; - root->fs_info->free_extent_hook = free_extent_hook; - root->fs_info->corrupt_blocks = &corrupt_blocks; + type = btrfs_chunk_type(eb, chunk); + profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK; + if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + error("chunk[%llu %llu) has no chunk type", + chunk_key.offset, chunk_end); + err |= UNKNOWN_TYPE; + } + if (profile && (profile & (profile - 1))) { + error("chunk[%llu %llu) multiple profiles detected: %llx", + chunk_key.offset, chunk_end, profile); + err |= UNKNOWN_TYPE; } - bits_nr = 1024; - bits = malloc(bits_nr * sizeof(struct block_info)); - if (!bits) { - perror("malloc"); - exit(1); + bg_key.objectid = chunk_key.offset; + bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + bg_key.offset = length; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0); + if (ret) { + error( + "chunk[%llu %llu) did not find the related block group item", + chunk_key.offset, chunk_end); + err |= REFERENCER_MISSING; + } else{ + leaf = path.nodes[0]; + bi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_block_group_item); + read_extent_buffer(leaf, &bg_item, (unsigned long)bi, + sizeof(bg_item)); + if (btrfs_block_group_flags(&bg_item) != type) { + error( +"chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu", + chunk_key.offset, chunk_end, type, + btrfs_block_group_flags(&bg_item)); + err |= REFERENCER_MISSING; + } } - if (ctx.progress_enabled) { - ctx.tp = TASK_EXTENTS; - task_start(ctx.info); + num_stripes = btrfs_chunk_num_stripes(eb, chunk); + for (i = 0; i < num_stripes; i++) { + btrfs_release_path(&path); + btrfs_init_path(&path); + devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i); + devext_key.type = BTRFS_DEV_EXTENT_KEY; + devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i); + + ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path, + 0, 0); + if (ret) + goto not_match_dev; + + leaf = path.nodes[0]; + ptr = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_dev_extent); + objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr); + offset = btrfs_dev_extent_chunk_offset(leaf, ptr); + if (objectid != chunk_key.objectid || + offset != chunk_key.offset || + btrfs_dev_extent_length(leaf, ptr) != length) + goto not_match_dev; + continue; +not_match_dev: + err |= BACKREF_MISSING; + error( + "chunk[%llu %llu) stripe %d did not find the related dev extent", + chunk_key.objectid, chunk_end, i); + continue; } + btrfs_release_path(&path); +out: + return err; +} + +/* + * Main entry function to check known items and update related accounting info + */ +static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_key key; + int slot = 0; + int type; + struct btrfs_extent_data_ref *dref; + int ret; + int err = 0; + +next: + btrfs_item_key_to_cpu(eb, &key, slot); + type = key.type; + + switch (type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, eb, slot); + err |= ret; + break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + ret = check_block_group_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_DEV_ITEM_KEY: + ret = check_dev_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_CHUNK_ITEM_KEY: + ret = check_chunk_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_EXTENT_ITEM_KEY: + case BTRFS_METADATA_ITEM_KEY: + ret = check_extent_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_EXTENT_CSUM_KEY: + total_csum_bytes += btrfs_item_size_nr(eb, slot); + break; + case BTRFS_TREE_BLOCK_REF_KEY: + ret = check_tree_block_backref(fs_info, key.offset, + key.objectid, -1); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref); + ret = check_extent_data_backref(fs_info, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, dref), + btrfs_extent_data_ref_offset(eb, dref), + key.objectid, 0, + btrfs_extent_data_ref_count(eb, dref)); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = check_shared_block_backref(fs_info, key.offset, + key.objectid, -1); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + ret = check_shared_data_backref(fs_info, key.offset, + key.objectid); + err |= ret; + break; + default: + break; + } + + if (++slot < btrfs_header_nritems(eb)) + goto next; + + return err; +} + +/* + * Helper function for later fs/subvol tree check. To determine if a tree + * block should be checked. + * This function will ensure only the direct referencer with lowest rootid to + * check a fs/subvolume tree block. + * + * Backref check at extent tree would detect errors like missing subvolume + * tree, so we can do aggressive check to reduce duplicated checks. + */ +static int should_check(struct btrfs_root *root, struct extent_buffer *eb) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *leaf; + int slot; + struct btrfs_extent_item *ei; + unsigned long ptr; + unsigned long end; + int type; + u32 item_size; + u64 offset; + struct btrfs_extent_inline_ref *iref; + int ret; -again: - root1 = root->fs_info->tree_root; - level = btrfs_header_level(root1->node); - ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, 0, level, 0, - root1->nodesize, NULL); - if (ret < 0) - goto out; - root1 = root->fs_info->chunk_root; - level = btrfs_header_level(root1->node); - ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, 0, level, 0, - root1->nodesize, NULL); - if (ret < 0) - goto out; btrfs_init_path(&path); - key.offset = 0; - key.objectid = 0; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - ret = btrfs_search_slot(NULL, root->fs_info->tree_root, - &key, &path, 0, 0); + key.objectid = btrfs_header_bytenr(eb); + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + + /* + * Any failure in backref resolving means we can't determine + * whom the tree block belongs to. + * So in that case, we need to check that tree block + */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); if (ret < 0) - goto out; - while(1) { - leaf = path.nodes[0]; - slot = path.slots[0]; - if (slot >= btrfs_header_nritems(path.nodes[0])) { - ret = btrfs_next_leaf(root, &path); - if (ret != 0) - break; - leaf = path.nodes[0]; - slot = path.slots[0]; - } - btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); - if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) { - unsigned long offset; - u64 last_snapshot; + goto need_check; - offset = btrfs_item_ptr_offset(leaf, path.slots[0]); - read_extent_buffer(leaf, &ri, offset, sizeof(ri)); - last_snapshot = btrfs_root_last_snapshot(&ri); - if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { - level = btrfs_root_level(&ri); - level_size = root->nodesize; - ret = add_root_item_to_list(&normal_trees, - found_key.objectid, - btrfs_root_bytenr(&ri), - last_snapshot, level, - 0, level_size, NULL); - if (ret < 0) - goto out; - } else { - level = btrfs_root_level(&ri); - level_size = root->nodesize; - objectid = found_key.objectid; - btrfs_disk_key_to_cpu(&found_key, - &ri.drop_progress); - ret = add_root_item_to_list(&dropping_trees, - objectid, - btrfs_root_bytenr(&ri), - last_snapshot, level, - ri.drop_level, - level_size, &found_key); - if (ret < 0) - goto out; - } + ret = btrfs_previous_extent_item(extent_root, &path, + btrfs_header_bytenr(eb)); + if (ret) + goto need_check; + + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } + + item_size = btrfs_item_size_nr(leaf, slot); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + /* + * We only check the tree block if current root is + * the lowest referencer of it. + */ + if (type == BTRFS_TREE_BLOCK_REF_KEY && + offset < root->objectid) { + btrfs_release_path(&path); + return 0; } - path.slots[0]++; + + ptr += btrfs_extent_inline_ref_size(type); } + /* + * Normally we should also check keyed tree block ref, but that may be + * very time consuming. Inlined ref should already make us skip a lot + * of refs now. So skip search keyed tree block ref. + */ + +need_check: btrfs_release_path(&path); + return 1; +} + +/* + * Traversal function for tree block. We will do: + * 1) Skip shared fs/subvolume tree blocks + * 2) Update related bytes accounting + * 3) Pre-order traversal + */ +static int traverse_tree_block(struct btrfs_root *root, + struct extent_buffer *node) +{ + struct extent_buffer *eb; + struct btrfs_key key; + struct btrfs_key drop_key; + int level; + u64 nr; + int i; + int err = 0; + int ret; /* - * check_block can return -EAGAIN if it fixes something, please keep - * this in mind when dealing with return values from these functions, if - * we get -EAGAIN we want to fall through and restart the loop. + * Skip shared fs/subvolume tree block, in that case they will + * be checked by referencer with lowest rootid */ - ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending, - &seen, &reada, &nodes, &extent_cache, - &chunk_cache, &dev_cache, &block_group_cache, - &dev_extent_cache); - if (ret < 0) { - if (ret == -EAGAIN) - goto loop; - goto out; - } - ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr, - &pending, &seen, &reada, &nodes, - &extent_cache, &chunk_cache, &dev_cache, - &block_group_cache, &dev_extent_cache); - if (ret < 0) { - if (ret == -EAGAIN) - goto loop; - goto out; + if (is_fstree(root->objectid) && !should_check(root, node)) + return 0; + + /* Update bytes accounting */ + total_btree_bytes += node->len; + if (fs_root_objectid(btrfs_header_owner(node))) + total_fs_tree_bytes += node->len; + if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID) + total_extent_tree_bytes += node->len; + if (!found_old_backref && + btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID && + btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV && + !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC)) + found_old_backref = 1; + + /* pre-order tranversal, check itself first */ + level = btrfs_header_level(node); + ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node), + btrfs_header_level(node), + btrfs_header_owner(node)); + err |= ret; + if (err) + error( + "check %s failed root %llu bytenr %llu level %d, force continue check", + level ? "node":"leaf", root->objectid, + btrfs_header_bytenr(node), btrfs_header_level(node)); + + if (!level) { + btree_space_waste += btrfs_leaf_free_space(root, node); + ret = check_leaf_items(root, node); + err |= ret; + return err; } - ret = check_chunks(&chunk_cache, &block_group_cache, - &dev_extent_cache, NULL, NULL, NULL, 0); - if (ret) { - if (ret == -EAGAIN) - goto loop; - err = ret; + nr = btrfs_header_nritems(node); + btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress); + btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) * + sizeof(struct btrfs_key_ptr); + + /* Then check all its children */ + for (i = 0; i < nr; i++) { + u64 blocknr = btrfs_node_blockptr(node, i); + + btrfs_node_key_to_cpu(node, &key, i); + if (level == root->root_item.drop_level && + is_dropped_key(&key, &drop_key)) + continue; + + /* + * As a btrfs tree has most 8 levels (0..7), so it's quite safe + * to call the function itself. + */ + eb = read_tree_block(root, blocknr, root->nodesize, 0); + if (extent_buffer_uptodate(eb)) { + ret = traverse_tree_block(root, eb); + err |= ret; + } + free_extent_buffer(eb); } - ret = check_extent_refs(root, &extent_cache); - if (ret < 0) { - if (ret == -EAGAIN) - goto loop; + return err; +} + +/* + * Low memory usage version check_chunks_and_extents. + */ +static int check_chunks_and_extents_v2(struct btrfs_root *root) +{ + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_root *root1; + struct btrfs_root *cur_root; + int err = 0; + int ret; + + root1 = root->fs_info->chunk_root; + ret = traverse_tree_block(root1, root1->node); + err |= ret; + + root1 = root->fs_info->tree_root; + ret = traverse_tree_block(root1, root1->node); + err |= ret; + + btrfs_init_path(&path); + key.objectid = BTRFS_EXTENT_TREE_OBJECTID; + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0); + if (ret) { + error("cannot find extent treet in tree_root"); goto out; } - ret = check_devices(&dev_cache, &dev_extent_cache); - if (ret && err) - ret = err; + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.type != BTRFS_ROOT_ITEM_KEY) + goto next; + key.offset = (u64)-1; -out: - task_stop(ctx.info); - if (repair) { - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); - extent_io_tree_cleanup(&excluded_extents); - root->fs_info->fsck_extent_cache = NULL; - root->fs_info->free_extent_hook = NULL; - root->fs_info->corrupt_blocks = NULL; - root->fs_info->excluded_extents = NULL; + cur_root = btrfs_read_fs_root(root->fs_info, &key); + if (IS_ERR(cur_root) || !cur_root) { + error("failed to read tree: %lld", key.objectid); + goto next; + } + + ret = traverse_tree_block(cur_root, cur_root->node); + err |= ret; + +next: + ret = btrfs_next_item(root1, &path); + if (ret) + goto out; } - free(bits); - free_chunk_cache_tree(&chunk_cache); - free_device_cache_tree(&dev_cache); - free_block_group_tree(&block_group_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - return ret; -loop: - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - free_chunk_cache_tree(&chunk_cache); - free_block_group_tree(&block_group_cache); - free_device_cache_tree(&dev_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_record_cache(root->fs_info, &extent_cache); - free_root_item_list(&normal_trees); - free_root_item_list(&dropping_trees); - extent_io_tree_cleanup(&excluded_extents); - goto again; + +out: + btrfs_release_path(&path); + return err; } static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, @@ -8536,24 +10784,20 @@ static int pin_metadata_blocks(struct btrfs_fs_info *fs_info) static int reset_block_groups(struct btrfs_fs_info *fs_info) { struct btrfs_block_group_cache *cache; - struct btrfs_path *path; + struct btrfs_path path; struct extent_buffer *leaf; struct btrfs_chunk *chunk; struct btrfs_key key; int ret; u64 start; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = 0; key.type = BTRFS_CHUNK_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0); if (ret < 0) { - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -8568,10 +10812,10 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info) /* First we need to create the in-memory block groups */ while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(fs_info->chunk_root, path); + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(fs_info->chunk_root, &path); if (ret < 0) { - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } if (ret) { @@ -8579,15 +10823,14 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info) break; } } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.type != BTRFS_CHUNK_ITEM_KEY) { - path->slots[0]++; + path.slots[0]++; continue; } - chunk = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_chunk); + chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk); btrfs_add_block_group(fs_info, 0, btrfs_chunk_type(leaf, chunk), key.objectid, key.offset, @@ -8595,7 +10838,7 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info) set_extent_dirty(&fs_info->free_space_cache, key.offset, key.offset + btrfs_chunk_length(leaf, chunk), GFP_NOFS); - path->slots[0]++; + path.slots[0]++; } start = 0; while (1) { @@ -8606,7 +10849,7 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info) start = cache->key.objectid + cache->key.offset; } - btrfs_free_path(path); + btrfs_release_path(&path); return 0; } @@ -8614,22 +10857,18 @@ static int reset_balance(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct btrfs_root *root = fs_info->tree_root; - struct btrfs_path *path; + struct btrfs_path path; struct extent_buffer *leaf; struct btrfs_key key; int del_slot, del_nr = 0; int ret; int found = 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = BTRFS_BALANCE_OBJECTID; key.type = BTRFS_BALANCE_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret) { if (ret > 0) ret = 0; @@ -8639,64 +10878,63 @@ static int reset_balance(struct btrfs_trans_handle *trans, goto out; } - ret = btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, &path); if (ret) goto out; - btrfs_release_path(path); + btrfs_release_path(&path); key.objectid = BTRFS_TREE_RELOC_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) goto out; while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { if (!found) break; if (del_nr) { - ret = btrfs_del_items(trans, root, path, + ret = btrfs_del_items(trans, root, &path, del_slot, del_nr); del_nr = 0; if (ret) goto out; } key.offset++; - btrfs_release_path(path); + btrfs_release_path(&path); found = 0; - ret = btrfs_search_slot(trans, root, &key, path, + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) goto out; continue; } found = 1; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.objectid > BTRFS_TREE_RELOC_OBJECTID) break; if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) { - path->slots[0]++; + path.slots[0]++; continue; } if (!del_nr) { - del_slot = path->slots[0]; + del_slot = path.slots[0]; del_nr = 1; } else { del_nr++; } - path->slots[0]++; + path.slots[0]++; } if (del_nr) { - ret = btrfs_del_items(trans, root, path, del_slot, del_nr); + ret = btrfs_del_items(trans, root, &path, del_slot, del_nr); if (ret) goto out; } - btrfs_release_path(path); + btrfs_release_path(&path); reinit_data_reloc: key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; @@ -8714,7 +10952,7 @@ reinit_data_reloc: goto out; ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID); out: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -8797,14 +11035,14 @@ static int reinit_extent_tree(struct btrfs_trans_handle *trans, ret = reset_balance(trans, fs_info); if (ret) - fprintf(stderr, "error reseting the pending balance\n"); + fprintf(stderr, "error resetting the pending balance\n"); return ret; } static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_trans_handle *trans; struct btrfs_key key; int ret; @@ -8821,31 +11059,26 @@ static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb return PTR_ERR(root); } - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } - path->lowest_level = btrfs_header_level(eb); - if (path->lowest_level) + btrfs_init_path(&path); + path.lowest_level = btrfs_header_level(eb); + if (path.lowest_level) btrfs_node_key_to_cpu(eb, &key, 0); else btrfs_item_key_to_cpu(eb, &key, 0); - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); btrfs_commit_transaction(trans, root); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_trans_handle *trans; struct btrfs_key key; int ret; @@ -8863,26 +11096,21 @@ static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad) return PTR_ERR(root); } - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_free_path(path); + if (IS_ERR(trans)) return PTR_ERR(trans); - } - ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1); if (ret) { if (ret > 0) ret = 0; goto out; } - ret = btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, &path); out: btrfs_commit_transaction(trans, root); - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -8929,7 +11157,7 @@ static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root, struct btrfs_root *cur_root) { - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_key key; struct extent_buffer *node; struct btrfs_file_extent_item *fi; @@ -8939,30 +11167,25 @@ static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans, int slot = 0; int ret = 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; buf = malloc(cur_root->fs_info->csum_root->sectorsize); - if (!buf) { - ret = -ENOMEM; - goto out; - } + if (!buf) + return -ENOMEM; + btrfs_init_path(&path); key.objectid = 0; key.offset = 0; key.type = 0; - - ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0); if (ret < 0) goto out; /* Iterate all regular file extents and fill its csum */ while (1) { - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); if (key.type != BTRFS_EXTENT_DATA_KEY) goto next; - node = path->nodes[0]; - slot = path->slots[0]; + node = path.nodes[0]; + slot = path.slots[0]; fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG) goto next; @@ -8979,7 +11202,7 @@ next: * TODO: if next leaf is corrupted, jump to nearest next valid * leaf. */ - ret = btrfs_next_item(cur_root, path); + ret = btrfs_next_item(cur_root, &path); if (ret < 0) goto out; if (ret > 0) { @@ -8989,7 +11212,7 @@ next: } out: - btrfs_free_path(path); + btrfs_release_path(&path); free(buf); return ret; } @@ -8998,7 +11221,7 @@ static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root) { struct btrfs_fs_info *fs_info = csum_root->fs_info; - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *cur_root; struct extent_buffer *node; @@ -9006,15 +11229,11 @@ static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, int slot = 0; int ret = 0; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = BTRFS_FS_TREE_OBJECTID; key.offset = 0; key.type = BTRFS_ROOT_ITEM_KEY; - - ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0); if (ret < 0) goto out; if (ret > 0) { @@ -9023,8 +11242,8 @@ static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, } while (1) { - node = path->nodes[0]; - slot = path->slots[0]; + node = path.nodes[0]; + slot = path.slots[0]; btrfs_item_key_to_cpu(node, &key, slot); if (key.objectid > BTRFS_LAST_FREE_OBJECTID) goto out; @@ -9045,7 +11264,7 @@ static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, if (ret < 0) goto out; next: - ret = btrfs_next_item(tree_root, path); + ret = btrfs_next_item(tree_root, &path); if (ret > 0) { ret = 0; goto out; @@ -9055,7 +11274,7 @@ next: } out: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -9063,36 +11282,32 @@ static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root) { struct btrfs_root *extent_root = csum_root->fs_info->extent_root; - struct btrfs_path *path; + struct btrfs_path path; struct btrfs_extent_item *ei; struct extent_buffer *leaf; char *buf; struct btrfs_key key; int ret; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = 0; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); if (ret < 0) { - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } buf = malloc(csum_root->sectorsize); if (!buf) { - btrfs_free_path(path); + btrfs_release_path(&path); return -ENOMEM; } while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(extent_root, path); + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(extent_root, &path); if (ret < 0) break; if (ret) { @@ -9100,19 +11315,19 @@ static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, break; } } - leaf = path->nodes[0]; + leaf = path.nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); if (key.type != BTRFS_EXTENT_ITEM_KEY) { - path->slots[0]++; + path.slots[0]++; continue; } - ei = btrfs_item_ptr(leaf, path->slots[0], + ei = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_extent_item); if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) { - path->slots[0]++; + path.slots[0]++; continue; } @@ -9120,10 +11335,10 @@ static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, key.offset); if (ret) break; - path->slots[0]++; + path.slots[0]++; } - btrfs_free_path(path); + btrfs_release_path(&path); free(buf); return ret; } @@ -9171,7 +11386,7 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) int ret = 0; struct btrfs_key key; struct extent_buffer *leaf; - struct btrfs_path *path; + struct btrfs_path path; if (!roots_info_cache) { roots_info_cache = malloc(sizeof(*roots_info_cache)); @@ -9180,24 +11395,20 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) cache_tree_init(roots_info_cache); } - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + btrfs_init_path(&path); key.objectid = 0; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = 0; - - ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0); if (ret < 0) goto out; - leaf = path->nodes[0]; + leaf = path.nodes[0]; while (1) { struct btrfs_key found_key; struct btrfs_extent_item *ei; struct btrfs_extent_inline_ref *iref; - int slot = path->slots[0]; + int slot = path.slots[0]; int type; u64 flags; u64 root_id; @@ -9206,18 +11417,18 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) struct root_item_info *rii; if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(info->extent_root, path); + ret = btrfs_next_leaf(info->extent_root, &path); if (ret < 0) { break; } else if (ret) { ret = 0; break; } - leaf = path->nodes[0]; - slot = path->slots[0]; + leaf = path.nodes[0]; + slot = path.slots[0]; } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); if (found_key.type != BTRFS_EXTENT_ITEM_KEY && found_key.type != BTRFS_METADATA_ITEM_KEY) @@ -9280,11 +11491,11 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) rii->node_count++; } next: - path->slots[0]++; + path.slots[0]++; } out: - btrfs_free_path(path); + btrfs_release_path(&path); return ret; } @@ -9380,7 +11591,7 @@ static int maybe_repair_root_item(struct btrfs_fs_info *info, */ static int repair_root_items(struct btrfs_fs_info *info) { - struct btrfs_path *path = NULL; + struct btrfs_path path; struct btrfs_key key; struct extent_buffer *leaf; struct btrfs_trans_handle *trans = NULL; @@ -9388,16 +11599,12 @@ static int repair_root_items(struct btrfs_fs_info *info) int bad_roots = 0; int need_trans = 0; + btrfs_init_path(&path); + ret = build_roots_info_cache(info); if (ret) goto out; - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = 0; @@ -9416,19 +11623,19 @@ again: } } - ret = btrfs_search_slot(trans, info->tree_root, &key, path, + ret = btrfs_search_slot(trans, info->tree_root, &key, &path, 0, trans ? 1 : 0); if (ret < 0) goto out; - leaf = path->nodes[0]; + leaf = path.nodes[0]; while (1) { struct btrfs_key found_key; - if (path->slots[0] >= btrfs_header_nritems(leaf)) { - int no_more_keys = find_next_key(path, &key); + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + int no_more_keys = find_next_key(&path, &key); - btrfs_release_path(path); + btrfs_release_path(&path); if (trans) { ret = btrfs_commit_transaction(trans, info->tree_root); @@ -9442,14 +11649,14 @@ again: goto again; } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); if (found_key.type != BTRFS_ROOT_ITEM_KEY) goto next; if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID) goto next; - ret = maybe_repair_root_item(info, path, &found_key, + ret = maybe_repair_root_item(info, &path, &found_key, trans ? 0 : 1); if (ret < 0) goto out; @@ -9457,18 +11664,18 @@ again: if (!trans && repair) { need_trans = 1; key = found_key; - btrfs_release_path(path); + btrfs_release_path(&path); goto again; } bad_roots++; } next: - path->slots[0]++; + path.slots[0]++; } ret = 0; out: free_roots_info_cache(); - btrfs_free_path(path); + btrfs_release_path(&path); if (trans) btrfs_commit_transaction(trans, info->tree_root); if (ret < 0) @@ -9477,12 +11684,42 @@ out: return bad_roots; } +static int clear_free_space_cache(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_block_group_cache *bg_cache; + u64 current = 0; + int ret = 0; + + /* Clear all free space cache inodes and its extent data */ + while (1) { + bg_cache = btrfs_lookup_first_block_group(fs_info, current); + if (!bg_cache) + break; + ret = btrfs_clear_free_space_cache(fs_info, bg_cache); + if (ret < 0) + return ret; + current = bg_cache->key.objectid + bg_cache->key.offset; + } + + /* Don't forget to set cache_generation to -1 */ + trans = btrfs_start_transaction(fs_info->tree_root, 0); + if (IS_ERR(trans)) { + error("failed to update super block cache generation"); + return PTR_ERR(trans); + } + btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1); + btrfs_commit_transaction(trans, fs_info->tree_root); + + return ret; +} + const char * const cmd_check_usage[] = { "btrfs check [options] ", - "Check structural inegrity of a filesystem (unmounted).", - "Check structural inegrity of an unmounted filesystem. Verify internal", + "Check structural integrity of a filesystem (unmounted).", + "Check structural integrity of an unmounted filesystem. Verify internal", "trees' consistency and item connectivity. In the repair mode try to", - "fix the problems found.", + "fix the problems found. ", "WARNING: the repair mode is considered dangerous", "", "-s|--super use this superblock copy", @@ -9491,13 +11728,20 @@ const char * const cmd_check_usage[] = { "--readonly run in read-only mode (default)", "--init-csum-tree create a new CRC tree", "--init-extent-tree create a new extent tree", - "--check-data-csum verify checkums of data blocks", - "-Q|--qgroup-report print a report on qgroup consistency", + "--mode allows choice of memory/IO trade-offs", + " where MODE is one of:", + " original - read inodes and extents to memory (requires", + " more memory, does less IO)", + " lowmem - try to use less memory but read blocks again", + " when needed", + "--check-data-csum verify checksums of data blocks", + "-Q|--qgroup-report print a report on qgroup consistency", "-E|--subvol-extents ", " print subvolume extents and sharing state", "-r|--tree-root use the given bytenr for the tree root", "--chunk-root use the given bytenr for the chunk tree root", "-p|--progress indicate progress", + "--clear-space-cache v1|v2 clear space cache for v1 or v2", NULL }; @@ -9515,14 +11759,17 @@ int cmd_check(int argc, char **argv) u64 num; int init_csum_tree = 0; int readonly = 0; + int clear_space_cache = 0; int qgroup_report = 0; - enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE; + int qgroups_repaired = 0; + unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE; while(1) { int c; enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM, GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM, - GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE }; + GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE, + GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE }; static const struct option long_options[] = { { "super", required_argument, NULL, 's' }, { "repair", no_argument, NULL, GETOPT_VAL_REPAIR }, @@ -9540,6 +11787,10 @@ int cmd_check(int argc, char **argv) { "chunk-root", required_argument, NULL, GETOPT_VAL_CHUNK_TREE }, { "progress", no_argument, NULL, 'p' }, + { "mode", required_argument, NULL, + GETOPT_VAL_MODE }, + { "clear-space-cache", required_argument, NULL, + GETOPT_VAL_CLEAR_SPACE_CACHE}, { NULL, 0, NULL, 0} }; @@ -9554,8 +11805,8 @@ int cmd_check(int argc, char **argv) case 's': num = arg_strtou64(optarg); if (num >= BTRFS_SUPER_MIRROR_MAX) { - fprintf(stderr, - "ERROR: super mirror should be less than: %d\n", + error( + "super mirror should be less than %d", BTRFS_SUPER_MIRROR_MAX); exit(1); } @@ -9604,6 +11855,26 @@ int cmd_check(int argc, char **argv) case GETOPT_VAL_CHECK_CSUM: check_data_csum = 1; break; + case GETOPT_VAL_MODE: + check_mode = parse_check_mode(optarg); + if (check_mode == CHECK_MODE_UNKNOWN) { + error("unknown mode: %s", optarg); + exit(1); + } + break; + case GETOPT_VAL_CLEAR_SPACE_CACHE: + if (strcmp(optarg, "v1") == 0) { + clear_space_cache = 1; + } else if (strcmp(optarg, "v2") == 0) { + clear_space_cache = 2; + ctree_flags |= OPEN_CTREE_INVALIDATE_FST; + } else { + error( + "invalid argument to --clear-space-cache, must be v1 or v2"); + exit(1); + } + ctree_flags |= OPEN_CTREE_WRITES; + break; } } @@ -9617,7 +11888,15 @@ int cmd_check(int argc, char **argv) /* This check is the only reason for --readonly to exist */ if (readonly && repair) { - fprintf(stderr, "Repair options are not compatible with --readonly\n"); + error("repair options are not compatible with --readonly"); + exit(1); + } + + /* + * Not supported yet + */ + if (repair && check_mode == CHECK_MODE_LOWMEM) { + error("low memory mode doesn't support repair yet"); exit(1); } @@ -9625,10 +11904,10 @@ int cmd_check(int argc, char **argv) cache_tree_init(&root_cache); if((ret = check_mounted(argv[optind])) < 0) { - fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret)); + error("could not check mount status: %s", strerror(-ret)); goto err_out; } else if(ret) { - fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]); + error("%s is currently mounted, aborting", argv[optind]); ret = -EBUSY; goto err_out; } @@ -9640,27 +11919,61 @@ int cmd_check(int argc, char **argv) info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr, chunk_root_bytenr, ctree_flags); if (!info) { - fprintf(stderr, "Couldn't open file system\n"); + error("cannot open file system"); ret = -EIO; goto err_out; } global_info = info; root = info->fs_root; + if (clear_space_cache == 1) { + if (btrfs_fs_compat_ro(info, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { + error( + "free space cache v2 detected, use --clear-space-cache v2"); + ret = 1; + goto close_out; + } + printf("Clearing free space cache\n"); + ret = clear_free_space_cache(info); + if (ret) { + error("failed to clear free space cache"); + ret = 1; + } else { + printf("Free space cache cleared\n"); + } + goto close_out; + } else if (clear_space_cache == 2) { + if (!btrfs_fs_compat_ro(info, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { + printf("no free space cache v2 to clear\n"); + ret = 0; + goto close_out; + } + printf("Clear free space cache v2\n"); + ret = btrfs_clear_free_space_tree(info); + if (ret) { + error("failed to clear free space cache v2: %d", ret); + ret = 1; + } else { + printf("free space cache v2 cleared\n"); + } + goto close_out; + } /* * repair mode will force us to commit transaction which * will make us fail to load log tree when mounting. */ if (repair && btrfs_super_log_root(info->super_copy)) { - ret = ask_user("repair mode will force to clear out log tree, Are you sure?"); + ret = ask_user("repair mode will force to clear out log tree, are you sure?"); if (!ret) { ret = 1; goto close_out; } ret = zero_log_tree(root); if (ret) { - fprintf(stderr, "fail to zero log tree\n"); + error("failed to zero log tree: %d", ret); goto close_out; } } @@ -9671,7 +11984,7 @@ int cmd_check(int argc, char **argv) uuidbuf); ret = qgroup_verify_all(info); if (ret == 0) - print_qgroup_report(1); + report_qgroups(1); goto close_out; } if (subvolid) { @@ -9685,7 +11998,7 @@ int cmd_check(int argc, char **argv) if (!extent_buffer_uptodate(info->tree_root->node) || !extent_buffer_uptodate(info->dev_root->node) || !extent_buffer_uptodate(info->chunk_root->node)) { - fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n"); + error("critical roots corrupted, unable to check the filesystem"); ret = -EIO; goto close_out; } @@ -9695,7 +12008,7 @@ int cmd_check(int argc, char **argv) trans = btrfs_start_transaction(info->extent_root, 0); if (IS_ERR(trans)) { - fprintf(stderr, "Error starting transaction\n"); + error("error starting transaction"); ret = PTR_ERR(trans); goto close_out; } @@ -9708,10 +12021,11 @@ int cmd_check(int argc, char **argv) } if (init_csum_tree) { - fprintf(stderr, "Reinit crc root\n"); + printf("Reinitialize checksum tree\n"); ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0); if (ret) { - fprintf(stderr, "crc root initialization failed\n"); + error("checksum tree initialization failed: %d", + ret); ret = -EIO; goto close_out; } @@ -9719,7 +12033,7 @@ int cmd_check(int argc, char **argv) ret = fill_csum_tree(trans, info->csum_root, init_extent_tree); if (ret) { - fprintf(stderr, "crc refilling failed\n"); + error("checksum tree refilling failed: %d", ret); return -EIO; } } @@ -9732,21 +12046,25 @@ int cmd_check(int argc, char **argv) goto close_out; } if (!extent_buffer_uptodate(info->extent_root->node)) { - fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n"); + error("critical: extent_root, unable to check the filesystem"); ret = -EIO; goto close_out; } if (!extent_buffer_uptodate(info->csum_root->node)) { - fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n"); + error("critical: csum_root, unable to check the filesystem"); ret = -EIO; goto close_out; } if (!ctx.progress_enabled) fprintf(stderr, "checking extents\n"); - ret = check_chunks_and_extents(root); + if (check_mode == CHECK_MODE_LOWMEM) + ret = check_chunks_and_extents_v2(root); + else + ret = check_chunks_and_extents(root); if (ret) - fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n"); + error( + "errors found in extent allocation tree or chunk allocation"); ret = repair_root_items(info); if (ret < 0) @@ -9825,14 +12143,21 @@ int cmd_check(int argc, char **argv) err = qgroup_verify_all(info); if (err) goto out; + report_qgroups(0); + err = repair_qgroups(info, &qgroups_repaired); + if (err) + goto out; } if (!list_empty(&root->fs_info->recow_ebs)) { - fprintf(stderr, "Transid errors in file system\n"); + error("transid errors in file system"); ret = 1; } out: - print_qgroup_report(0); + /* Don't override original ret */ + if (!ret && qgroups_repaired) + ret = qgroups_repaired; + if (found_old_backref) { /* * there was a disk format change when mixed * backref was in testing tree. The old format @@ -9859,6 +12184,7 @@ out: (unsigned long long)data_bytes_allocated, (unsigned long long)data_bytes_referenced); + free_qgroup_counts(); free_root_recs_tree(&root_cache); close_out: close_ctree(root);