X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=cmds-check.c;h=c40e5da37cc896e2d3ba83a1dc2202b54e7c6f11;hb=bfecd8bd6aafd3be183506e4a351d412f39fc2bc;hp=15e449563b4d9928f176dcc598f610b85cd057e8;hpb=16584ea51eeb836f6a82b44d15cbc40de1e4da5f;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/cmds-check.c b/cmds-check.c index 15e4495..c40e5da 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -30,16 +30,32 @@ #include "repair.h" #include "disk-io.h" #include "print-tree.h" +#include "task-utils.h" #include "transaction.h" #include "utils.h" #include "commands.h" #include "free-space-cache.h" +#include "free-space-tree.h" #include "btrfsck.h" #include "qgroup-verify.h" #include "rbtree-utils.h" #include "backref.h" #include "ulist.h" +enum task_position { + TASK_EXTENTS, + TASK_FREE_SPACE, + TASK_FS_ROOTS, + TASK_NOTHING, /* have to be the last element */ +}; + +struct task_ctx { + int progress_enabled; + enum task_position tp; + + struct task_info *info; +}; + static u64 bytes_used = 0; static u64 total_csum_bytes = 0; static u64 total_btree_bytes = 0; @@ -51,13 +67,16 @@ static u64 data_bytes_referenced = 0; static int found_old_backref = 0; static LIST_HEAD(duplicate_extents); static LIST_HEAD(delete_items); -static int repair = 0; static int no_holes = 0; static int init_extent_tree = 0; static int check_data_csum = 0; +static int low_memory = 0; +static struct btrfs_fs_info *global_info; +static struct task_ctx ctx = { 0 }; +static struct cache_tree *roots_info_cache = NULL; struct extent_backref { - struct list_head list; + struct rb_node node; unsigned int is_data:1; unsigned int found_extent_tree:1; unsigned int full_backref:1; @@ -65,6 +84,11 @@ struct extent_backref { unsigned int broken:1; }; +static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node) +{ + return rb_entry(node, struct extent_backref, node); +} + struct data_backref { struct extent_backref node; union { @@ -80,6 +104,61 @@ struct data_backref { u32 found_ref; }; +static inline struct data_backref* to_data_backref(struct extent_backref *back) +{ + return container_of(back, struct data_backref, node); +} + +static int compare_data_backref(struct rb_node *node1, struct rb_node *node2) +{ + struct extent_backref *ext1 = rb_node_to_extent_backref(node1); + struct extent_backref *ext2 = rb_node_to_extent_backref(node2); + struct data_backref *back1 = to_data_backref(ext1); + struct data_backref *back2 = to_data_backref(ext2); + + WARN_ON(!ext1->is_data); + WARN_ON(!ext2->is_data); + + /* parent and root are a union, so this covers both */ + if (back1->parent > back2->parent) + return 1; + if (back1->parent < back2->parent) + return -1; + + /* This is a full backref and the parents match. */ + if (back1->node.full_backref) + return 0; + + if (back1->owner > back2->owner) + return 1; + if (back1->owner < back2->owner) + return -1; + + if (back1->offset > back2->offset) + return 1; + if (back1->offset < back2->offset) + return -1; + + if (back1->bytes > back2->bytes) + return 1; + if (back1->bytes < back2->bytes) + return -1; + + if (back1->found_ref && back2->found_ref) { + if (back1->disk_bytenr > back2->disk_bytenr) + return 1; + if (back1->disk_bytenr < back2->disk_bytenr) + return -1; + + if (back1->found_ref > back2->found_ref) + return 1; + if (back1->found_ref < back2->found_ref) + return -1; + } + + return 0; +} + /* * Much like data_backref, just removed the undetermined members * and change it to use list_head. @@ -103,9 +182,59 @@ struct tree_backref { }; }; +static inline struct tree_backref* to_tree_backref(struct extent_backref *back) +{ + return container_of(back, struct tree_backref, node); +} + +static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2) +{ + struct extent_backref *ext1 = rb_node_to_extent_backref(node1); + struct extent_backref *ext2 = rb_node_to_extent_backref(node2); + struct tree_backref *back1 = to_tree_backref(ext1); + struct tree_backref *back2 = to_tree_backref(ext2); + + WARN_ON(ext1->is_data); + WARN_ON(ext2->is_data); + + /* parent and root are a union, so this covers both */ + if (back1->parent > back2->parent) + return 1; + if (back1->parent < back2->parent) + return -1; + + return 0; +} + +static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2) +{ + struct extent_backref *ext1 = rb_node_to_extent_backref(node1); + struct extent_backref *ext2 = rb_node_to_extent_backref(node2); + + if (ext1->is_data > ext2->is_data) + return 1; + + if (ext1->is_data < ext2->is_data) + return -1; + + if (ext1->full_backref > ext2->full_backref) + return 1; + if (ext1->full_backref < ext2->full_backref) + return -1; + + if (ext1->is_data) + return compare_data_backref(node1, node2); + else + return compare_tree_backref(node1, node2); +} + +/* Explicit initialization for extent_record::flag_block_full_backref */ +enum { FLAG_UNSET = 2 }; + struct extent_record { struct list_head backrefs; struct list_head dups; + struct rb_root backref_tree; struct list_head list; struct cache_extent cache; struct btrfs_disk_key parent_key; @@ -119,14 +248,22 @@ struct extent_record { u64 info_objectid; u32 num_duplicates; u8 info_level; + unsigned int flag_block_full_backref:2; unsigned int found_rec:1; unsigned int content_checked:1; unsigned int owner_ref_checked:1; unsigned int is_root:1; unsigned int metadata:1; - unsigned int flag_block_full_backref:1; + unsigned int bad_full_backref:1; + unsigned int crossing_stripes:1; + unsigned int wrong_chunk_type:1; }; +static inline struct extent_record* to_extent_record(struct list_head *entry) +{ + return container_of(entry, struct extent_record, list); +} + struct inode_backref { struct list_head list; unsigned int found_dir_item:1; @@ -141,10 +278,16 @@ struct inode_backref { char name[0]; }; +static inline struct inode_backref* to_inode_backref(struct list_head *entry) +{ + return list_entry(entry, struct inode_backref, list); +} + struct root_item_record { struct list_head list; u64 objectid; u64 bytenr; + u64 last_snapshot; u8 level; u8 drop_level; int level_size; @@ -171,6 +314,178 @@ struct file_extent_hole { u64 len; }; +struct inode_record { + struct list_head backrefs; + unsigned int checked:1; + unsigned int merging:1; + unsigned int found_inode_item:1; + unsigned int found_dir_item:1; + unsigned int found_file_extent:1; + unsigned int found_csum_item:1; + unsigned int some_csum_missing:1; + unsigned int nodatasum:1; + int errors; + + u64 ino; + u32 nlink; + u32 imode; + u64 isize; + u64 nbytes; + + u32 found_link; + u64 found_size; + u64 extent_start; + u64 extent_end; + struct rb_root holes; + struct list_head orphan_extents; + + u32 refs; +}; + +#define I_ERR_NO_INODE_ITEM (1 << 0) +#define I_ERR_NO_ORPHAN_ITEM (1 << 1) +#define I_ERR_DUP_INODE_ITEM (1 << 2) +#define I_ERR_DUP_DIR_INDEX (1 << 3) +#define I_ERR_ODD_DIR_ITEM (1 << 4) +#define I_ERR_ODD_FILE_EXTENT (1 << 5) +#define I_ERR_BAD_FILE_EXTENT (1 << 6) +#define I_ERR_FILE_EXTENT_OVERLAP (1 << 7) +#define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100 +#define I_ERR_DIR_ISIZE_WRONG (1 << 9) +#define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400 +#define I_ERR_ODD_CSUM_ITEM (1 << 11) +#define I_ERR_SOME_CSUM_MISSING (1 << 12) +#define I_ERR_LINK_COUNT_WRONG (1 << 13) +#define I_ERR_FILE_EXTENT_ORPHAN (1 << 14) + +struct root_backref { + struct list_head list; + unsigned int found_dir_item:1; + unsigned int found_dir_index:1; + unsigned int found_back_ref:1; + unsigned int found_forward_ref:1; + unsigned int reachable:1; + int errors; + u64 ref_root; + u64 dir; + u64 index; + u16 namelen; + char name[0]; +}; + +static inline struct root_backref* to_root_backref(struct list_head *entry) +{ + return list_entry(entry, struct root_backref, list); +} + +struct root_record { + struct list_head backrefs; + struct cache_extent cache; + unsigned int found_root_item:1; + u64 objectid; + u32 found_ref; +}; + +struct ptr_node { + struct cache_extent cache; + void *data; +}; + +struct shared_node { + struct cache_extent cache; + struct cache_tree root_cache; + struct cache_tree inode_cache; + struct inode_record *current; + u32 refs; +}; + +struct block_info { + u64 start; + u32 size; +}; + +struct walk_control { + struct cache_tree shared; + struct shared_node *nodes[BTRFS_MAX_LEVEL]; + int active_node; + int root_level; +}; + +struct bad_item { + struct btrfs_key key; + u64 root_id; + struct list_head list; +}; + +struct extent_entry { + u64 bytenr; + u64 bytes; + int count; + int broken; + struct list_head list; +}; + +struct root_item_info { + /* level of the root */ + u8 level; + /* number of nodes at this level, must be 1 for a root */ + int node_count; + u64 bytenr; + u64 gen; + struct cache_extent cache_extent; +}; + +/* + * Error bit for low memory mode check. + * + * Currently no caller cares about it yet. Just internal use for error + * classification. + */ +#define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */ +#define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */ +#define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */ +#define REFERENCER_MISSING (1 << 3) /* Referencer not found */ +#define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */ +#define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */ +#define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */ +#define UNKNOWN_TYPE (1 << 6) /* Unknown type */ +#define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */ +#define CHUNK_TYPE_MISMATCH (1 << 8) + +static void *print_status_check(void *p) +{ + struct task_ctx *priv = p; + const char work_indicator[] = { '.', 'o', 'O', 'o' }; + uint32_t count = 0; + static char *task_position_string[] = { + "checking extents", + "checking free space cache", + "checking fs roots", + }; + + task_period_start(priv->info, 1000 /* 1s */); + + if (priv->tp == TASK_NOTHING) + return NULL; + + while (1) { + printf("%s [%c]\r", task_position_string[priv->tp], + work_indicator[count % 4]); + count++; + fflush(stdout); + task_period_wait(priv->info); + } + return NULL; +} + +static int print_status_return(void *p) +{ + printf("\n"); + fflush(stdout); + + return 0; +} + /* Compatible function to allow reuse of old codes */ static u64 first_extent_gap(struct rb_root *holes) { @@ -183,7 +498,7 @@ static u64 first_extent_gap(struct rb_root *holes) return hole->start; } -int compare_hole(struct rb_node *node1, struct rb_node *node2) +static int compare_hole(struct rb_node *node1, struct rb_node *node2) { struct file_extent_hole *hole1; struct file_extent_hole *hole2; @@ -284,8 +599,10 @@ static int del_file_extent_hole(struct rb_root *holes, { struct file_extent_hole *hole; struct file_extent_hole tmp; - struct file_extent_hole prev; - struct file_extent_hole next; + u64 prev_start = 0; + u64 prev_len = 0; + u64 next_start = 0; + u64 next_len = 0; struct rb_node *node; int have_prev = 0; int have_next = 0; @@ -301,28 +618,28 @@ static int del_file_extent_hole(struct rb_root *holes, return -EEXIST; /* - * Now there will be no overflap, delete the hole and re-add the + * Now there will be no overlap, delete the hole and re-add the * split(s) if they exists. */ if (start > hole->start) { - prev.start = hole->start; - prev.len = start - hole->start; + prev_start = hole->start; + prev_len = start - hole->start; have_prev = 1; } if (hole->start + hole->len > start + len) { - next.start = start + len; - next.len = hole->start + hole->len - start - len; + next_start = start + len; + next_len = hole->start + hole->len - start - len; have_next = 1; } rb_erase(node, holes); free(hole); if (have_prev) { - ret = add_file_extent_hole(holes, prev.start, prev.len); + ret = add_file_extent_hole(holes, prev_start, prev_len); if (ret < 0) return ret; } if (have_next) { - ret = add_file_extent_hole(holes, next.start, next.len); + ret = add_file_extent_hole(holes, next_start, next_len); if (ret < 0) return ret; } @@ -361,133 +678,35 @@ static void free_file_extent_holes(struct rb_root *holes) } } -struct inode_record { - struct list_head backrefs; - unsigned int checked:1; - unsigned int merging:1; - unsigned int found_inode_item:1; - unsigned int found_dir_item:1; - unsigned int found_file_extent:1; - unsigned int found_csum_item:1; - unsigned int some_csum_missing:1; - unsigned int nodatasum:1; - int errors; +static void reset_cached_block_groups(struct btrfs_fs_info *fs_info); - u64 ino; - u32 nlink; - u32 imode; - u64 isize; - u64 nbytes; +static void record_root_in_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + if (root->last_trans != trans->transid) { + root->track_dirty = 1; + root->last_trans = trans->transid; + root->commit_root = root->node; + extent_buffer_get(root->node); + } +} - u32 found_link; - u64 found_size; - u64 extent_start; - u64 extent_end; - struct rb_root holes; - struct list_head orphan_extents; +static u8 imode_to_type(u32 imode) +{ +#define S_SHIFT 12 + static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, + [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, + }; - u32 refs; -}; - -#define I_ERR_NO_INODE_ITEM (1 << 0) -#define I_ERR_NO_ORPHAN_ITEM (1 << 1) -#define I_ERR_DUP_INODE_ITEM (1 << 2) -#define I_ERR_DUP_DIR_INDEX (1 << 3) -#define I_ERR_ODD_DIR_ITEM (1 << 4) -#define I_ERR_ODD_FILE_EXTENT (1 << 5) -#define I_ERR_BAD_FILE_EXTENT (1 << 6) -#define I_ERR_FILE_EXTENT_OVERLAP (1 << 7) -#define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100 -#define I_ERR_DIR_ISIZE_WRONG (1 << 9) -#define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400 -#define I_ERR_ODD_CSUM_ITEM (1 << 11) -#define I_ERR_SOME_CSUM_MISSING (1 << 12) -#define I_ERR_LINK_COUNT_WRONG (1 << 13) -#define I_ERR_FILE_EXTENT_ORPHAN (1 << 14) - -struct root_backref { - struct list_head list; - unsigned int found_dir_item:1; - unsigned int found_dir_index:1; - unsigned int found_back_ref:1; - unsigned int found_forward_ref:1; - unsigned int reachable:1; - int errors; - u64 ref_root; - u64 dir; - u64 index; - u16 namelen; - char name[0]; -}; - -struct root_record { - struct list_head backrefs; - struct cache_extent cache; - unsigned int found_root_item:1; - u64 objectid; - u32 found_ref; -}; - -struct ptr_node { - struct cache_extent cache; - void *data; -}; - -struct shared_node { - struct cache_extent cache; - struct cache_tree root_cache; - struct cache_tree inode_cache; - struct inode_record *current; - u32 refs; -}; - -struct block_info { - u64 start; - u32 size; -}; - -struct walk_control { - struct cache_tree shared; - struct shared_node *nodes[BTRFS_MAX_LEVEL]; - int active_node; - int root_level; -}; - -struct bad_item { - struct btrfs_key key; - u64 root_id; - struct list_head list; -}; - -static void reset_cached_block_groups(struct btrfs_fs_info *fs_info); - -static void record_root_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - if (root->last_trans != trans->transid) { - root->track_dirty = 1; - root->last_trans = trans->transid; - root->commit_root = root->node; - extent_buffer_get(root->node); - } -} - -static u8 imode_to_type(u32 imode) -{ -#define S_SHIFT 12 - static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, - }; - - return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT]; -#undef S_SHIFT -} + return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT]; +#undef S_SHIFT +} static int device_record_compare(struct rb_node *node1, struct rb_node *node2) { @@ -509,30 +728,61 @@ static struct inode_record *clone_inode_rec(struct inode_record *orig_rec) struct inode_record *rec; struct inode_backref *backref; struct inode_backref *orig; + struct inode_backref *tmp; struct orphan_data_extent *src_orphan; struct orphan_data_extent *dst_orphan; size_t size; + int ret; rec = malloc(sizeof(*rec)); + if (!rec) + return ERR_PTR(-ENOMEM); memcpy(rec, orig_rec, sizeof(*rec)); rec->refs = 1; INIT_LIST_HEAD(&rec->backrefs); INIT_LIST_HEAD(&rec->orphan_extents); + rec->holes = RB_ROOT; list_for_each_entry(orig, &orig_rec->backrefs, list) { size = sizeof(*orig) + orig->namelen + 1; backref = malloc(size); + if (!backref) { + ret = -ENOMEM; + goto cleanup; + } memcpy(backref, orig, size); list_add_tail(&backref->list, &rec->backrefs); } list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) { dst_orphan = malloc(sizeof(*dst_orphan)); - /* TODO: Fix all the HELL of un-catched -ENOMEM case */ - BUG_ON(!dst_orphan); + if (!dst_orphan) { + ret = -ENOMEM; + goto cleanup; + } memcpy(dst_orphan, src_orphan, sizeof(*src_orphan)); list_add_tail(&dst_orphan->list, &rec->orphan_extents); } + ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes); + BUG_ON(ret < 0); + return rec; + +cleanup: + if (!list_empty(&rec->backrefs)) + list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) { + list_del(&orig->list); + free(orig); + } + + if (!list_empty(&rec->orphan_extents)) + list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) { + list_del(&orig->list); + free(orig); + } + + free(rec); + + return ERR_PTR(ret); } static void print_orphan_data_extents(struct list_head *orphan_extents, @@ -606,15 +856,20 @@ static void print_inode_error(struct btrfs_root *root, struct inode_record *rec) if (errors & I_ERR_FILE_EXTENT_DISCOUNT) { struct file_extent_hole *hole; struct rb_node *node; + int found = 0; node = rb_first(&rec->holes); fprintf(stderr, "Found file extent holes:\n"); while (node) { + found = 1; hole = rb_entry(node, struct file_extent_hole, node); - fprintf(stderr, "\tstart: %llu, len:%llu\n", + fprintf(stderr, "\tstart: %llu, len: %llu\n", hole->start, hole->len); node = rb_next(node); } + if (!found) + fprintf(stderr, "\tstart: 0, len: %llu\n", + round_up(rec->isize, root->sectorsize)); } } @@ -633,9 +888,9 @@ static void print_ref_error(int errors) if (errors & REF_ERR_DUP_INODE_REF) fprintf(stderr, ", dup inode ref"); if (errors & REF_ERR_INDEX_UNMATCH) - fprintf(stderr, ", index unmatch"); + fprintf(stderr, ", index mismatch"); if (errors & REF_ERR_FILETYPE_UNMATCH) - fprintf(stderr, ", filetype unmatch"); + fprintf(stderr, ", filetype mismatch"); if (errors & REF_ERR_NAME_TOO_LONG) fprintf(stderr, ", name too long"); if (errors & REF_ERR_NO_ROOT_REF) @@ -663,11 +918,15 @@ static struct inode_record *get_inode_rec(struct cache_tree *inode_cache, rec = node->data; if (mod && rec->refs > 1) { node->data = clone_inode_rec(rec); + if (IS_ERR(node->data)) + return node->data; rec->refs--; rec = node->data; } } else if (mod) { rec = calloc(1, sizeof(*rec)); + if (!rec) + return ERR_PTR(-ENOMEM); rec->ino = ino; rec->extent_start = (u64)-1; rec->refs = 1; @@ -676,6 +935,10 @@ static struct inode_record *get_inode_rec(struct cache_tree *inode_cache, rec->holes = RB_ROOT; node = malloc(sizeof(*node)); + if (!node) { + free(rec); + return ERR_PTR(-ENOMEM); + } node->cache.start = ino; node->cache.size = 1; node->data = rec; @@ -684,7 +947,8 @@ static struct inode_record *get_inode_rec(struct cache_tree *inode_cache, rec->found_link = 1; ret = insert_cache_extent(inode_cache, &node->cache); - BUG_ON(ret); + if (ret) + return ERR_PTR(-EEXIST); } return rec; } @@ -709,8 +973,7 @@ static void free_inode_rec(struct inode_record *rec) return; while (!list_empty(&rec->backrefs)) { - backref = list_entry(rec->backrefs.next, - struct inode_backref, list); + backref = to_inode_backref(rec->backrefs.next); list_del(&backref->list); free(backref); } @@ -743,7 +1006,8 @@ static void maybe_free_inode_rec(struct cache_tree *inode_cache, if (backref->found_dir_item && backref->found_dir_index) { if (backref->filetype != filetype) backref->errors |= REF_ERR_FILETYPE_UNMATCH; - if (!backref->errors && backref->found_inode_ref) { + if (!backref->errors && backref->found_inode_ref && + rec->nlink == rec->found_link) { list_del(&backref->list); free(backref); } @@ -849,6 +1113,8 @@ static struct inode_backref *get_inode_backref(struct inode_record *rec, } backref = malloc(sizeof(*backref) + namelen + 1); + if (!backref) + return NULL; memset(backref, 0, sizeof(*backref)); backref->dir = dir; backref->namelen = namelen; @@ -867,7 +1133,9 @@ static int add_inode_backref(struct cache_tree *inode_cache, struct inode_backref *backref; rec = get_inode_rec(inode_cache, ino, 1); + BUG_ON(IS_ERR(rec)); backref = get_inode_backref(rec, name, namelen, dir); + BUG_ON(!backref); if (errors) backref->errors |= errors; if (itemtype == BTRFS_DIR_INDEX_KEY) { @@ -1021,6 +1289,7 @@ again: ins = node; } else { ins = malloc(sizeof(*ins)); + BUG_ON(!ins); ins->cache.start = node->cache.start; ins->cache.size = node->cache.size; ins->data = rec; @@ -1029,6 +1298,7 @@ again: ret = insert_cache_extent(dst, &ins->cache); if (ret == -EEXIST) { conflict = get_inode_rec(dst, rec->ino, 1); + BUG_ON(IS_ERR(conflict)); merge_inode_recs(rec, conflict, dst); if (rec->checked) { conflict->checked = 1; @@ -1056,6 +1326,7 @@ again: maybe_free_inode_rec(dst, dst_node->current); } dst_node->current = get_inode_rec(dst, current_ino, 1); + BUG_ON(IS_ERR(dst_node->current)); } return 0; } @@ -1093,6 +1364,8 @@ static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs) struct shared_node *node; node = calloc(1, sizeof(*node)); + if (!node) + return -ENOMEM; node->cache.start = bytenr; node->cache.size = 1; cache_tree_init(&node->root_cache); @@ -1100,8 +1373,8 @@ static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs) node->refs = refs; ret = insert_cache_extent(shared, &node->cache); - BUG_ON(ret); - return 0; + + return ret; } static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs, @@ -1109,6 +1382,7 @@ static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs, { struct shared_node *node; struct shared_node *dest; + int ret; if (level == wc->active_node) return 0; @@ -1116,7 +1390,8 @@ static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs, BUG_ON(wc->active_node <= level); node = find_shared_node(&wc->shared, bytenr); if (!node) { - add_shared_node(&wc->shared, bytenr, refs); + ret = add_shared_node(&wc->shared, bytenr, refs); + BUG_ON(ret); node = find_shared_node(&wc->shared, bytenr); wc->nodes[level] = node; wc->active_node = level; @@ -1530,7 +1805,16 @@ static int process_file_extent(struct btrfs_root *root, } rec->extent_end = key->offset + num_bytes; - if (disk_bytenr > 0) { + /* + * The data reloc tree will copy full extents into its inode and then + * copy the corresponding csums. Because the extent it copied could be + * a preallocated extent that hasn't been written to yet there may be no + * csums to copy, ergo we won't have csums for our file extent. This is + * ok so just don't bother checking csums if the inode belongs to the + * data reloc tree. + */ + if (disk_bytenr > 0 && + btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) { u64 found; if (btrfs_file_extent_compression(eb, fi)) num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); @@ -1587,6 +1871,7 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb, } active_node->current = get_inode_rec(inode_cache, key.objectid, 1); + BUG_ON(IS_ERR(active_node->current)); } switch (key.type) { case BTRFS_DIR_ITEM_KEY: @@ -1628,7 +1913,7 @@ static void reada_walk_down(struct btrfs_root *root, return; nritems = btrfs_header_nritems(node); - blocksize = btrfs_level_size(root, level - 1); + blocksize = root->nodesize; for (i = slot; i < nritems; i++) { bytenr = btrfs_node_blockptr(node, i); ptr_gen = btrfs_node_ptr_generation(node, i); @@ -1686,8 +1971,14 @@ static int check_child_node(struct btrfs_root *root, return ret; } +struct node_refs { + u64 bytenr[BTRFS_MAX_LEVEL]; + u64 refs[BTRFS_MAX_LEVEL]; +}; + static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, - struct walk_control *wc, int *level) + struct walk_control *wc, int *level, + struct node_refs *nrefs) { enum btrfs_tree_block_status status; u64 bytenr; @@ -1700,12 +1991,20 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = btrfs_lookup_extent_info(NULL, root, + + if (path->nodes[*level]->start == nrefs->bytenr[*level]) { + refs = nrefs->refs[*level]; + ret = 0; + } else { + ret = btrfs_lookup_extent_info(NULL, root, path->nodes[*level]->start, *level, 1, &refs, NULL); - if (ret < 0) { - err = ret; - goto out; + if (ret < 0) { + err = ret; + goto out; + } + nrefs->bytenr[*level] = path->nodes[*level]->start; + nrefs->refs[*level] = refs; } if (refs > 1) { @@ -1735,11 +2034,20 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); - ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1, - 1, &refs, NULL); - if (ret < 0) - refs = 0; + blocksize = root->nodesize; + + if (bytenr == nrefs->bytenr[*level - 1]) { + refs = nrefs->refs[*level - 1]; + } else { + ret = btrfs_lookup_extent_info(NULL, root, bytenr, + *level - 1, 1, &refs, NULL); + if (ret < 0) { + refs = 0; + } else { + nrefs->bytenr[*level - 1] = bytenr; + nrefs->refs[*level - 1] = refs; + } + } if (refs > 1) { ret = enter_shared_node(root, bytenr, refs, @@ -1756,7 +2064,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); - if (!next) { + if (!extent_buffer_uptodate(next)) { struct btrfs_key node_key; btrfs_node_key_to_cpu(path->nodes[*level], @@ -1765,7 +2073,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, btrfs_add_corrupt_extent_record(root->fs_info, &node_key, path->nodes[*level]->start, - root->leafsize, *level); + root->nodesize, *level); err = -EIO; goto out; } @@ -1832,7 +2140,7 @@ static int check_root_dir(struct inode_record *rec) goto out; if (list_empty(&rec->backrefs)) goto out; - backref = list_entry(rec->backrefs.next, struct inode_backref, list); + backref = to_inode_backref(rec->backrefs.next); if (!backref->found_inode_ref) goto out; if (backref->index != 0 || backref->namelen != 2 || @@ -1900,6 +2208,39 @@ static int repair_inode_orphan_item(struct btrfs_trans_handle *trans, return ret; } +static int repair_inode_nbytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode_record *rec) +{ + struct btrfs_inode_item *ei; + struct btrfs_key key; + int ret = 0; + + key.objectid = rec->ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto out; + } + + /* Since ret == 0, no need to check anything */ + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size); + btrfs_mark_buffer_dirty(path->nodes[0]); + rec->errors &= ~I_ERR_FILE_NBYTES_WRONG; + printf("reset nbytes for ino %llu root %llu\n", + rec->ino, root->root_key.objectid); +out: + btrfs_release_path(path); + return ret; +} + static int add_missing_dir_index(struct btrfs_root *root, struct cache_tree *inode_cache, struct inode_record *rec, @@ -1954,6 +2295,7 @@ static int add_missing_dir_index(struct btrfs_root *root, backref->found_dir_index = 1; dir_rec = get_inode_rec(inode_cache, backref->dir, 0); + BUG_ON(IS_ERR(dir_rec)); if (!dir_rec) return 0; dir_rec->found_size += backref->namelen; @@ -2283,7 +2625,7 @@ static int reset_nlink(struct btrfs_trans_handle *trans, list_for_each_entry(backref, &rec->backrefs, list) { ret = btrfs_add_link(trans, root, rec->ino, backref->dir, backref->name, backref->namelen, - backref->ref_type, &backref->index, 1); + backref->filetype, &backref->index, 1); if (ret < 0) goto out; } @@ -2347,13 +2689,17 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino, mode); if (ret < 0) { - fprintf(stderr, "Failed to create '%s' dir: %s", + fprintf(stderr, "Failed to create '%s' dir: %s\n", dir_name, strerror(-ret)); goto out; } ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino, namebuf, namelen, type, NULL, 1); - if (ret == -EEXIST) { + /* + * Add ".INO" suffix several times to handle case where + * "FILENAME.INO" is already taken by another file. + */ + while (ret == -EEXIST) { /* * Conflicting file name, add ".INO" as suffix * +1 for '.' */ @@ -2371,7 +2717,7 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, } if (ret < 0) { fprintf(stderr, - "Failed to link the inode %llu to %s dir: %s", + "Failed to link the inode %llu to %s dir: %s\n", rec->ino, dir_name, strerror(-ret)); goto out; } @@ -2385,9 +2731,14 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n", namelen, namebuf, dir_name); } - rec->errors &= ~I_ERR_LINK_COUNT_WRONG; printf("Fixed the nlink of inode %llu\n", rec->ino); out: + /* + * Clear the flag anyway, or we will loop forever for the same inode + * as it will not be removed from the bad inode list and the dead loop + * happens. + */ + rec->errors &= ~I_ERR_LINK_COUNT_WRONG; btrfs_release_path(path); return ret; } @@ -2502,7 +2853,7 @@ static int repair_inode_no_item(struct btrfs_trans_handle *trans, type_recovered = 1; filetype = BTRFS_FT_REG_FILE; } else{ - printf("Can't determint the filetype for inode %llu, assume it is a normal file\n", + printf("Can't determine the filetype for inode %llu, assume it is a normal file\n", rec->ino); type_recovered = 1; filetype = BTRFS_FT_REG_FILE; @@ -2599,11 +2950,13 @@ static int repair_inode_discount_extent(struct btrfs_trans_handle *trans, { struct rb_node *node; struct file_extent_hole *hole; + int found = 0; int ret = 0; node = rb_first(&rec->holes); while (node) { + found = 1; hole = rb_entry(node, struct file_extent_hole, node); ret = btrfs_punch_hole(trans, root, rec->ino, hole->start, hole->len); @@ -2617,6 +2970,13 @@ static int repair_inode_discount_extent(struct btrfs_trans_handle *trans, rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT; node = rb_first(&rec->holes); } + /* special case for a file losing all its file extent */ + if (!found) { + ret = btrfs_punch_hole(trans, root, rec->ino, 0, + round_up(rec->isize, root->sectorsize)); + if (ret < 0) + goto out; + } printf("Fixed discount file extents for inode: %llu in root: %llu\n", rec->ino, root->objectid); out: @@ -2634,7 +2994,8 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) I_ERR_LINK_COUNT_WRONG | I_ERR_NO_INODE_ITEM | I_ERR_FILE_EXTENT_ORPHAN | - I_ERR_FILE_EXTENT_DISCOUNT))) + I_ERR_FILE_EXTENT_DISCOUNT| + I_ERR_FILE_NBYTES_WRONG))) return rec->errors; path = btrfs_alloc_path(); @@ -2666,6 +3027,8 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) ret = repair_inode_orphan_item(trans, root, path, rec); if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG) ret = repair_inode_nlinks(trans, root, path, rec); + if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG) + ret = repair_inode_nbytes(trans, root, path, rec); btrfs_commit_transaction(trans, root); btrfs_free_path(path); return ret; @@ -2693,7 +3056,7 @@ static int check_inode_recs(struct btrfs_root *root, /* * We need to record the highest inode number for later 'lost+found' * dir creation. - * We must select a ino not used/refered by any existing inode, or + * We must select an ino not used/referred by any existing inode, or * 'lost+found' ino may be a missing ino in a corrupted leaf, * this may cause 'lost+found' dir has wrong nlinks. */ @@ -2757,6 +3120,7 @@ static int check_inode_recs(struct btrfs_root *root, return err; rec = get_inode_rec(inode_cache, root_dirid, 0); + BUG_ON(IS_ERR(rec)); if (rec) { ret = check_root_dir(rec); if (ret) { @@ -2864,13 +3228,16 @@ static struct root_record *get_root_rec(struct cache_tree *root_cache, rec = container_of(cache, struct root_record, cache); } else { rec = calloc(1, sizeof(*rec)); + if (!rec) + return ERR_PTR(-ENOMEM); rec->objectid = objectid; INIT_LIST_HEAD(&rec->backrefs); rec->cache.start = objectid; rec->cache.size = 1; ret = insert_cache_extent(root_cache, &rec->cache); - BUG_ON(ret); + if (ret) + return ERR_PTR(-EEXIST); } return rec; } @@ -2890,8 +3257,9 @@ static struct root_backref *get_root_backref(struct root_record *rec, return backref; } - backref = malloc(sizeof(*backref) + namelen + 1); - memset(backref, 0, sizeof(*backref)); + backref = calloc(1, sizeof(*backref) + namelen + 1); + if (!backref) + return NULL; backref->ref_root = ref_root; backref->dir = dir; backref->index = index; @@ -2909,8 +3277,7 @@ static void free_root_record(struct cache_extent *cache) rec = container_of(cache, struct root_record, cache); while (!list_empty(&rec->backrefs)) { - backref = list_entry(rec->backrefs.next, - struct root_backref, list); + backref = to_root_backref(rec->backrefs.next); list_del(&backref->list); free(backref); } @@ -2929,7 +3296,9 @@ static int add_root_backref(struct cache_tree *root_cache, struct root_backref *backref; rec = get_root_rec(root_cache, root_id); + BUG_ON(IS_ERR(rec)); backref = get_root_backref(rec, ref_root, dir, index, name, namelen); + BUG_ON(!backref); backref->errors |= errors; @@ -3034,6 +3403,7 @@ static int check_root_refs(struct btrfs_root *root, int errors = 0; rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID); + BUG_ON(IS_ERR(rec)); rec->found_ref = 1; /* fixme: this can not detect circular references */ @@ -3055,6 +3425,7 @@ static int check_root_refs(struct btrfs_root *root, ref_root = get_root_rec(root_cache, backref->ref_root); + BUG_ON(IS_ERR(ref_root)); if (ref_root->found_ref > 0) continue; @@ -3289,6 +3660,7 @@ static int check_fs_root(struct btrfs_root *root, struct orphan_data_extent *orphan; struct orphan_data_extent *tmp; enum btrfs_tree_block_status status; + struct node_refs nrefs; /* * Reuse the corrupt_block cache tree to record corrupted tree block @@ -3301,6 +3673,7 @@ static int check_fs_root(struct btrfs_root *root, if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { rec = get_root_rec(root_cache, root->root_key.objectid); + BUG_ON(IS_ERR(rec)); if (btrfs_root_refs(root_item) > 0) rec->found_root_item = 1; } @@ -3309,6 +3682,7 @@ static int check_fs_root(struct btrfs_root *root, memset(&root_node, 0, sizeof(root_node)); cache_tree_init(&root_node.root_cache); cache_tree_init(&root_node.inode_cache); + memset(&nrefs, 0, sizeof(nrefs)); /* Move the orphan extent record to corresponding inode_record */ list_for_each_entry_safe(orphan, tmp, @@ -3317,6 +3691,7 @@ static int check_fs_root(struct btrfs_root *root, inode = get_inode_rec(&root_node.inode_cache, orphan->objectid, 1); + BUG_ON(IS_ERR(inode)); inode->errors |= I_ERR_FILE_EXTENT_ORPHAN; list_move(&orphan->list, &inode->orphan_extents); } @@ -3357,7 +3732,7 @@ static int check_fs_root(struct btrfs_root *root, } while (1) { - wret = walk_down_tree(root, &path, wc, &level); + wret = walk_down_tree(root, &path, wc, &level, &nrefs); if (wret < 0) ret = wret; if (wret != 0) @@ -3442,6 +3817,11 @@ static int check_fs_roots(struct btrfs_root *root, int ret; int err = 0; + if (ctx.progress_enabled) { + ctx.tp = TASK_FS_ROOTS; + task_start(ctx.info); + } + /* * Just in case we made any changes to the extent tree that weren't * reflected into the free space cache yet. @@ -3518,27 +3898,28 @@ out: if (!cache_tree_empty(&wc.shared)) fprintf(stderr, "warning line %d\n", __LINE__); + task_stop(ctx.info); + return err; } static int all_backpointers_checked(struct extent_record *rec, int print_errs) { - struct list_head *cur = rec->backrefs.next; + struct rb_node *n; struct extent_backref *back; struct tree_backref *tback; struct data_backref *dback; u64 found = 0; int err = 0; - while(cur != &rec->backrefs) { - back = list_entry(cur, struct extent_backref, list); - cur = cur->next; + for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) { + back = rb_node_to_extent_backref(n); if (!back->found_extent_tree) { err = 1; if (!print_errs) goto out; if (back->is_data) { - dback = (struct data_backref *)back; + dback = to_data_backref(back); fprintf(stderr, "Backref %llu %s %llu" " owner %llu offset %llu num_refs %lu" " not found in extent tree\n", @@ -3552,7 +3933,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) (unsigned long long)dback->offset, (unsigned long)dback->num_refs); } else { - tback = (struct tree_backref *)back; + tback = to_tree_backref(back); fprintf(stderr, "Backref %llu parent %llu" " root %llu not found in extent tree\n", (unsigned long long)rec->start, @@ -3564,7 +3945,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) err = 1; if (!print_errs) goto out; - tback = (struct tree_backref *)back; + tback = to_tree_backref(back); fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n", (unsigned long long)rec->start, back->full_backref ? "parent" : "root", @@ -3573,7 +3954,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) (unsigned long long)tback->root, back); } if (back->is_data) { - dback = (struct data_backref *)back; + dback = to_data_backref(back); if (dback->found_ref != dback->num_refs) { err = 1; if (!print_errs) @@ -3617,7 +3998,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) if (!back->is_data) { found += 1; } else { - dback = (struct data_backref *)back; + dback = to_data_backref(back); found += dback->found_ref; } } @@ -3635,17 +4016,16 @@ out: return err; } -static int free_all_extent_backrefs(struct extent_record *rec) +static void __free_one_backref(struct rb_node *node) { - struct extent_backref *back; - struct list_head *cur; - while (!list_empty(&rec->backrefs)) { - cur = rec->backrefs.next; - back = list_entry(cur, struct extent_backref, list); - list_del(cur); - free(back); - } - return 0; + struct extent_backref *back = rb_node_to_extent_backref(node); + + free(back); +} + +static void free_all_extent_backrefs(struct extent_record *rec) +{ + rb_free_nodes(&rec->backref_tree, __free_one_backref); } static void free_extent_record_cache(struct btrfs_fs_info *fs_info, @@ -3659,7 +4039,6 @@ static void free_extent_record_cache(struct btrfs_fs_info *fs_info, if (!cache) break; rec = container_of(cache, struct extent_record, cache); - btrfs_unpin_extent(fs_info, rec->start, rec->max_size); remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); free(rec); @@ -3671,7 +4050,9 @@ static int maybe_free_extent_rec(struct cache_tree *extent_cache, { if (rec->content_checked && rec->owner_ref_checked && rec->extent_item_refs == rec->refs && rec->refs > 0 && - rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) { + rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) && + !rec->bad_full_backref && !rec->crossing_stripes && + !rec->wrong_chunk_type) { remove_cache_extent(extent_cache, &rec->cache); free_all_extent_backrefs(rec); list_del_init(&rec->list); @@ -3684,7 +4065,7 @@ static int check_owner_ref(struct btrfs_root *root, struct extent_record *rec, struct extent_buffer *buf) { - struct extent_backref *node; + struct extent_backref *node, *tmp; struct tree_backref *back; struct btrfs_root *ref_root; struct btrfs_key key; @@ -3694,14 +4075,15 @@ static int check_owner_ref(struct btrfs_root *root, int found = 0; int ret; - list_for_each_entry(node, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(node, tmp, + &rec->backref_tree, node) { if (node->is_data) continue; if (!node->found_ref) continue; if (node->full_backref) continue; - back = (struct tree_backref *)node; + back = to_tree_backref(node); if (btrfs_header_owner(buf) == back->root) return 0; } @@ -3739,18 +4121,16 @@ static int check_owner_ref(struct btrfs_root *root, static int is_extent_tree_record(struct extent_record *rec) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; + struct extent_backref *ref, *tmp; struct tree_backref *back; int is_extent = 0; - while(cur != &rec->backrefs) { - node = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (node->is_data) + rbtree_postorder_for_each_entry_safe(ref, tmp, + &rec->backref_tree, node) { + if (ref->is_data) return 0; - back = (struct tree_backref *)node; - if (node->full_backref) + back = to_tree_backref(ref); + if (ref->full_backref) return 0; if (back->root == BTRFS_EXTENT_TREE_OBJECTID) is_extent = 1; @@ -3986,11 +4366,11 @@ again: * Attempt to fix basic block failures. If we can't fix it for whatever reason * then just return -EIO. */ -static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int try_to_fix_bad_block(struct btrfs_root *root, struct extent_buffer *buf, enum btrfs_tree_block_status status) { + struct btrfs_trans_handle *trans; struct ulist *roots; struct ulist_node *node; struct btrfs_root *search_root; @@ -4007,7 +4387,7 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, if (!path) return -EIO; - ret = btrfs_find_all_roots(trans, root->fs_info, buf->start, + ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots); if (ret) { btrfs_free_path(path); @@ -4026,7 +4406,12 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, break; } - record_root_in_trans(trans, search_root); + + trans = btrfs_start_transaction(search_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } path->lowest_level = btrfs_header_level(buf); path->skip_check_block = 1; @@ -4037,23 +4422,26 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1); if (ret) { ret = -EIO; + btrfs_commit_transaction(trans, search_root); break; } if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) ret = fix_key_order(trans, search_root, path); else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) ret = fix_item_offset(trans, search_root, path); - if (ret) + if (ret) { + btrfs_commit_transaction(trans, search_root); break; + } btrfs_release_path(path); + btrfs_commit_transaction(trans, search_root); } ulist_free(roots); btrfs_free_path(path); return ret; } -static int check_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_block(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, u64 flags) { @@ -4089,8 +4477,7 @@ static int check_block(struct btrfs_trans_handle *trans, if (status != BTRFS_TREE_BLOCK_CLEAN) { if (repair) - status = try_to_fix_bad_block(trans, root, buf, - status); + status = try_to_fix_bad_block(root, buf, status); if (status != BTRFS_TREE_BLOCK_CLEAN) { ret = -EIO; fprintf(stderr, "bad block %llu\n", @@ -4098,7 +4485,7 @@ static int check_block(struct btrfs_trans_handle *trans, } else { /* * Signal to callers we need to start the scan over - * again since we'll have cow'ed blocks. + * again since we'll have cowed blocks. */ ret = -EAGAIN; } @@ -4117,38 +4504,40 @@ static int check_block(struct btrfs_trans_handle *trans, return ret; } + static struct tree_backref *find_tree_backref(struct extent_record *rec, u64 parent, u64 root) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; - struct tree_backref *back; + struct rb_node *node; + struct tree_backref *back = NULL; + struct tree_backref match = { + .node = { + .is_data = 0, + }, + }; - while(cur != &rec->backrefs) { - node = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (node->is_data) - continue; - back = (struct tree_backref *)node; - if (parent > 0) { - if (!node->full_backref) - continue; - if (parent == back->parent) - return back; - } else { - if (node->full_backref) - continue; - if (back->root == root) - return back; - } + if (parent) { + match.parent = parent; + match.node.full_backref = 1; + } else { + match.root = root; } - return NULL; + + node = rb_search(&rec->backref_tree, &match.node.node, + (rb_compare_keys)compare_extent_backref, NULL); + if (node) + back = to_tree_backref(rb_node_to_extent_backref(node)); + + return back; } static struct tree_backref *alloc_tree_backref(struct extent_record *rec, u64 parent, u64 root) { struct tree_backref *ref = malloc(sizeof(*ref)); + + if (!ref) + return NULL; memset(&ref->node, 0, sizeof(ref->node)); if (parent > 0) { ref->parent = parent; @@ -4157,7 +4546,7 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec, ref->root = root; ref->node.full_backref = 0; } - list_add_tail(&ref->node.list, &rec->backrefs); + rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref); return ref; } @@ -4168,35 +4557,32 @@ static struct data_backref *find_data_backref(struct extent_record *rec, int found_ref, u64 disk_bytenr, u64 bytes) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; - struct data_backref *back; + struct rb_node *node; + struct data_backref *back = NULL; + struct data_backref match = { + .node = { + .is_data = 1, + }, + .owner = owner, + .offset = offset, + .bytes = bytes, + .found_ref = found_ref, + .disk_bytenr = disk_bytenr, + }; - while(cur != &rec->backrefs) { - node = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (!node->is_data) - continue; - back = (struct data_backref *)node; - if (parent > 0) { - if (!node->full_backref) - continue; - if (parent == back->parent) - return back; - } else { - if (node->full_backref) - continue; - if (back->root == root && back->owner == owner && - back->offset == offset) { - if (found_ref && node->found_ref && - (back->bytes != bytes || - back->disk_bytenr != disk_bytenr)) - continue; - return back; - } - } + if (parent) { + match.parent = parent; + match.node.full_backref = 1; + } else { + match.root = root; } - return NULL; + + node = rb_search(&rec->backref_tree, &match.node.node, + (rb_compare_keys)compare_extent_backref, NULL); + if (node) + back = to_data_backref(rb_node_to_extent_backref(node)); + + return back; } static struct data_backref *alloc_data_backref(struct extent_record *rec, @@ -4205,6 +4591,9 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec, u64 max_size) { struct data_backref *ref = malloc(sizeof(*ref)); + + if (!ref) + return NULL; memset(&ref->node, 0, sizeof(ref->node)); ref->node.is_data = 1; @@ -4222,38 +4611,142 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec, ref->bytes = max_size; ref->found_ref = 0; ref->num_refs = 0; - list_add_tail(&ref->node.list, &rec->backrefs); + rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref); if (max_size > rec->max_size) rec->max_size = max_size; return ref; } -static int add_extent_rec(struct cache_tree *extent_cache, - struct btrfs_key *parent_key, u64 parent_gen, - u64 start, u64 nr, u64 extent_item_refs, - int is_root, int inc_ref, int set_checked, - int metadata, int extent_rec, u64 max_size) +/* Check if the type of extent matches with its chunk */ +static void check_extent_type(struct extent_record *rec) { - struct extent_record *rec; - struct cache_extent *cache; - int ret = 0; - int dup = 0; + struct btrfs_block_group_cache *bg_cache; - cache = lookup_cache_extent(extent_cache, start, nr); - if (cache) { - rec = container_of(cache, struct extent_record, cache); - if (inc_ref) - rec->refs++; - if (rec->nr == 1) - rec->nr = max(nr, max_size); + bg_cache = btrfs_lookup_first_block_group(global_info, rec->start); + if (!bg_cache) + return; - /* - * We need to make sure to reset nr to whatever the extent - * record says was the real size, this way we can compare it to + /* data extent, check chunk directly*/ + if (!rec->metadata) { + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) + rec->wrong_chunk_type = 1; + return; + } + + /* metadata extent, check the obvious case first */ + if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + rec->wrong_chunk_type = 1; + return; + } + + /* + * Check SYSTEM extent, as it's also marked as metadata, we can only + * make sure it's a SYSTEM extent by its backref + */ + if (!RB_EMPTY_ROOT(&rec->backref_tree)) { + struct extent_backref *node; + struct tree_backref *tback; + u64 bg_type; + + node = rb_node_to_extent_backref(rb_first(&rec->backref_tree)); + if (node->is_data) { + /* tree block shouldn't have data backref */ + rec->wrong_chunk_type = 1; + return; + } + tback = container_of(node, struct tree_backref, node); + + if (tback->root == BTRFS_CHUNK_TREE_OBJECTID) + bg_type = BTRFS_BLOCK_GROUP_SYSTEM; + else + bg_type = BTRFS_BLOCK_GROUP_METADATA; + if (!(bg_cache->flags & bg_type)) + rec->wrong_chunk_type = 1; + } +} + +/* + * Allocate a new extent record, fill default values from @tmpl and insert int + * @extent_cache. Caller is supposed to make sure the [start,nr) is not in + * the cache, otherwise it fails. + */ +static int add_extent_rec_nolookup(struct cache_tree *extent_cache, + struct extent_record *tmpl) +{ + struct extent_record *rec; + int ret = 0; + + rec = malloc(sizeof(*rec)); + if (!rec) + return -ENOMEM; + rec->start = tmpl->start; + rec->max_size = tmpl->max_size; + rec->nr = max(tmpl->nr, tmpl->max_size); + rec->found_rec = tmpl->found_rec; + rec->content_checked = tmpl->content_checked; + rec->owner_ref_checked = tmpl->owner_ref_checked; + rec->num_duplicates = 0; + rec->metadata = tmpl->metadata; + rec->flag_block_full_backref = FLAG_UNSET; + rec->bad_full_backref = 0; + rec->crossing_stripes = 0; + rec->wrong_chunk_type = 0; + rec->is_root = tmpl->is_root; + rec->refs = tmpl->refs; + rec->extent_item_refs = tmpl->extent_item_refs; + rec->parent_generation = tmpl->parent_generation; + INIT_LIST_HEAD(&rec->backrefs); + INIT_LIST_HEAD(&rec->dups); + INIT_LIST_HEAD(&rec->list); + rec->backref_tree = RB_ROOT; + memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key)); + rec->cache.start = tmpl->start; + rec->cache.size = tmpl->nr; + ret = insert_cache_extent(extent_cache, &rec->cache); + BUG_ON(ret); + bytes_used += rec->nr; + + if (tmpl->metadata) + rec->crossing_stripes = check_crossing_stripes(rec->start, + global_info->tree_root->nodesize); + check_extent_type(rec); + return ret; +} + +/* + * Lookup and modify an extent, some values of @tmpl are interpreted verbatim, + * some are hints: + * - refs - if found, increase refs + * - is_root - if found, set + * - content_checked - if found, set + * - owner_ref_checked - if found, set + * + * If not found, create a new one, initialize and insert. + */ +static int add_extent_rec(struct cache_tree *extent_cache, + struct extent_record *tmpl) +{ + struct extent_record *rec; + struct cache_extent *cache; + int ret = 0; + int dup = 0; + + cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr); + if (cache) { + rec = container_of(cache, struct extent_record, cache); + if (tmpl->refs) + rec->refs++; + if (rec->nr == 1) + rec->nr = max(tmpl->nr, tmpl->max_size); + + /* + * We need to make sure to reset nr to whatever the extent + * record says was the real size, this way we can compare it to * the backrefs. */ - if (extent_rec) { - if (start != rec->start || rec->found_rec) { + if (tmpl->found_rec) { + if (tmpl->start != rec->start || rec->found_rec) { struct extent_record *tmp; dup = 1; @@ -4270,97 +4763,61 @@ static int add_extent_rec(struct cache_tree *extent_cache, tmp = malloc(sizeof(*tmp)); if (!tmp) return -ENOMEM; - tmp->start = start; - tmp->max_size = max_size; - tmp->nr = nr; + tmp->start = tmpl->start; + tmp->max_size = tmpl->max_size; + tmp->nr = tmpl->nr; tmp->found_rec = 1; - tmp->metadata = metadata; - tmp->extent_item_refs = extent_item_refs; + tmp->metadata = tmpl->metadata; + tmp->extent_item_refs = tmpl->extent_item_refs; INIT_LIST_HEAD(&tmp->list); list_add_tail(&tmp->list, &rec->dups); rec->num_duplicates++; } else { - rec->nr = nr; + rec->nr = tmpl->nr; rec->found_rec = 1; } } - if (extent_item_refs && !dup) { + if (tmpl->extent_item_refs && !dup) { if (rec->extent_item_refs) { fprintf(stderr, "block %llu rec " "extent_item_refs %llu, passed %llu\n", - (unsigned long long)start, + (unsigned long long)tmpl->start, (unsigned long long) rec->extent_item_refs, - (unsigned long long)extent_item_refs); + (unsigned long long)tmpl->extent_item_refs); } - rec->extent_item_refs = extent_item_refs; + rec->extent_item_refs = tmpl->extent_item_refs; } - if (is_root) + if (tmpl->is_root) rec->is_root = 1; - if (set_checked) { + if (tmpl->content_checked) rec->content_checked = 1; + if (tmpl->owner_ref_checked) rec->owner_ref_checked = 1; - } - - if (parent_key) - btrfs_cpu_key_to_disk(&rec->parent_key, parent_key); - if (parent_gen) - rec->parent_generation = parent_gen; - - if (rec->max_size < max_size) - rec->max_size = max_size; + memcpy(&rec->parent_key, &tmpl->parent_key, + sizeof(tmpl->parent_key)); + if (tmpl->parent_generation) + rec->parent_generation = tmpl->parent_generation; + if (rec->max_size < tmpl->max_size) + rec->max_size = tmpl->max_size; + /* + * A metadata extent can't cross stripe_len boundary, otherwise + * kernel scrub won't be able to handle it. + * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check + * it. + */ + if (tmpl->metadata) + rec->crossing_stripes = check_crossing_stripes( + rec->start, global_info->tree_root->nodesize); + check_extent_type(rec); maybe_free_extent_rec(extent_cache, rec); return ret; } - rec = malloc(sizeof(*rec)); - rec->start = start; - rec->max_size = max_size; - rec->nr = max(nr, max_size); - rec->found_rec = !!extent_rec; - rec->content_checked = 0; - rec->owner_ref_checked = 0; - rec->num_duplicates = 0; - rec->metadata = metadata; - INIT_LIST_HEAD(&rec->backrefs); - INIT_LIST_HEAD(&rec->dups); - INIT_LIST_HEAD(&rec->list); - - if (is_root) - rec->is_root = 1; - else - rec->is_root = 0; - - if (inc_ref) - rec->refs = 1; - else - rec->refs = 0; - - if (extent_item_refs) - rec->extent_item_refs = extent_item_refs; - else - rec->extent_item_refs = 0; - if (parent_key) - btrfs_cpu_key_to_disk(&rec->parent_key, parent_key); - else - memset(&rec->parent_key, 0, sizeof(*parent_key)); - - if (parent_gen) - rec->parent_generation = parent_gen; - else - rec->parent_generation = 0; + ret = add_extent_rec_nolookup(extent_cache, tmpl); - rec->cache.start = start; - rec->cache.size = nr; - ret = insert_cache_extent(extent_cache, &rec->cache); - BUG_ON(ret); - bytes_used += nr; - if (set_checked) { - rec->content_checked = 1; - rec->owner_ref_checked = 1; - } return ret; } @@ -4373,8 +4830,15 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) { - add_extent_rec(extent_cache, NULL, 0, bytenr, - 1, 0, 0, 0, 0, 1, 0, 0); + struct extent_record tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = bytenr; + tmpl.nr = 1; + tmpl.metadata = 1; + + add_extent_rec_nolookup(extent_cache, &tmpl); + cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) abort(); @@ -4386,8 +4850,10 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, } back = find_tree_backref(rec, parent, root); - if (!back) + if (!back) { back = alloc_tree_backref(rec, parent, root); + BUG_ON(!back); + } if (found_ref) { if (back->node.found_ref) { @@ -4408,6 +4874,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, } back->node.found_extent_tree = 1; } + check_extent_type(rec); maybe_free_extent_rec(extent_cache, rec); return 0; } @@ -4422,8 +4889,15 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) { - add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0, - 0, 0, max_size); + struct extent_record tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = bytenr; + tmpl.nr = 1; + tmpl.max_size = max_size; + + add_extent_rec_nolookup(extent_cache, &tmpl); + cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) abort(); @@ -4445,9 +4919,11 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, */ back = find_data_backref(rec, parent, root, owner, offset, found_ref, bytenr, max_size); - if (!back) + if (!back) { back = alloc_data_backref(rec, parent, root, owner, offset, max_size); + BUG_ON(!back); + } if (found_ref) { BUG_ON(num_refs != 1); @@ -4677,14 +5153,12 @@ struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf, ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); num_stripes = btrfs_chunk_num_stripes(leaf, ptr); - rec = malloc(btrfs_chunk_record_size(num_stripes)); + rec = calloc(1, btrfs_chunk_record_size(num_stripes)); if (!rec) { fprintf(stderr, "memory allocation failed\n"); exit(-1); } - memset(rec, 0, btrfs_chunk_record_size(num_stripes)); - INIT_LIST_HEAD(&rec->list); INIT_LIST_HEAD(&rec->dextents); rec->bg_rec = NULL; @@ -4782,12 +5256,11 @@ btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key, struct btrfs_block_group_item *ptr; struct block_group_record *rec; - rec = malloc(sizeof(*rec)); + rec = calloc(1, sizeof(*rec)); if (!rec) { fprintf(stderr, "memory allocation failed\n"); exit(-1); } - memset(rec, 0, sizeof(*rec)); rec->cache.start = key->objectid; rec->cache.size = key->offset; @@ -4831,12 +5304,11 @@ btrfs_new_device_extent_record(struct extent_buffer *leaf, struct device_extent_record *rec; struct btrfs_dev_extent *ptr; - rec = malloc(sizeof(*rec)); + rec = calloc(1, sizeof(*rec)); if (!rec) { fprintf(stderr, "memory allocation failed\n"); exit(-1); } - memset(rec, 0, sizeof(*rec)); rec->cache.objectid = key->objectid; rec->cache.start = key->offset; @@ -4890,6 +5362,7 @@ static int process_extent_item(struct btrfs_root *root, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; struct btrfs_key key; + struct extent_record tmpl; unsigned long end; unsigned long ptr; int type; @@ -4903,7 +5376,7 @@ static int process_extent_item(struct btrfs_root *root, if (key.type == BTRFS_METADATA_ITEM_KEY) { metadata = 1; - num_bytes = root->leafsize; + num_bytes = root->nodesize; } else { num_bytes = key.offset; } @@ -4917,16 +5390,32 @@ static int process_extent_item(struct btrfs_root *root, #else BUG(); #endif - return add_extent_rec(extent_cache, NULL, 0, key.objectid, - num_bytes, refs, 0, 0, 0, metadata, 1, - num_bytes); + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = key.objectid; + tmpl.nr = num_bytes; + tmpl.extent_item_refs = refs; + tmpl.metadata = metadata; + tmpl.found_rec = 1; + tmpl.max_size = num_bytes; + + return add_extent_rec(extent_cache, &tmpl); } ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); refs = btrfs_extent_refs(eb, ei); + if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; + else + metadata = 0; - add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes, - refs, 0, 0, 0, metadata, 1, num_bytes); + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = key.objectid; + tmpl.nr = num_bytes; + tmpl.extent_item_refs = refs; + tmpl.metadata = metadata; + tmpl.found_rec = 1; + tmpl.max_size = num_bytes; + add_extent_rec(extent_cache, &tmpl); ptr = (unsigned long)(ei + 1); if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK && @@ -5119,7 +5608,7 @@ static int verify_space_cache(struct btrfs_root *root, if (key.type == BTRFS_EXTENT_ITEM_KEY) last = key.objectid + key.offset; else - last = key.objectid + root->leafsize; + last = key.objectid + root->nodesize; path->slots[0]++; continue; } @@ -5131,7 +5620,7 @@ static int verify_space_cache(struct btrfs_root *root, if (key.type == BTRFS_EXTENT_ITEM_KEY) last = key.objectid + key.offset; else - last = key.objectid + root->leafsize; + last = key.objectid + root->nodesize; path->slots[0]++; } @@ -5168,6 +5657,11 @@ static int check_space_cache(struct btrfs_root *root) return 0; } + if (ctx.progress_enabled) { + ctx.tp = TASK_FREE_SPACE; + task_start(ctx.info); + } + while (1) { cache = btrfs_lookup_first_block_group(root->fs_info, start); if (!cache) @@ -5184,53 +5678,41 @@ static int check_space_cache(struct btrfs_root *root) btrfs_remove_free_space_cache(cache); } - ret = load_free_space_cache(root->fs_info, cache); - if (!ret) - continue; + if (btrfs_fs_compat_ro(root->fs_info, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { + ret = exclude_super_stripes(root, cache); + if (ret) { + fprintf(stderr, "could not exclude super stripes: %s\n", + strerror(-ret)); + error++; + continue; + } + ret = load_free_space_tree(root->fs_info, cache); + free_excluded_extents(root, cache); + if (ret < 0) { + fprintf(stderr, "could not load free space tree: %s\n", + strerror(-ret)); + error++; + continue; + } + error += ret; + } else { + ret = load_free_space_cache(root->fs_info, cache); + if (!ret) + continue; + } ret = verify_space_cache(root, cache); if (ret) { - fprintf(stderr, "cache appears valid but isnt %Lu\n", + fprintf(stderr, "cache appears valid but isn't %Lu\n", cache->key.objectid); error++; } } - return error ? -EINVAL : 0; -} - -static int read_extent_data(struct btrfs_root *root, char *data, - u64 logical, u64 *len, int mirror) -{ - u64 offset = 0; - struct btrfs_multi_bio *multi = NULL; - struct btrfs_fs_info *info = root->fs_info; - struct btrfs_device *device; - int ret = 0; - u64 max_len = *len; - - ret = btrfs_map_block(&info->mapping_tree, READ, logical, len, - &multi, mirror, NULL); - if (ret) { - fprintf(stderr, "Couldn't map the block %llu\n", - logical + offset); - goto err; - } - device = multi->stripes[0].dev; - - if (device->fd == 0) - goto err; - if (*len > max_len) - *len = max_len; + task_stop(ctx.info); - ret = pread64(device->fd, data, *len, multi->stripes[0].physical); - if (ret != *len) - ret = -EIO; - else - ret = 0; -err: - kfree(multi); - return ret; + return error ? -EINVAL : 0; } static int check_extent_csums(struct btrfs_root *root, u64 bytenr, @@ -5312,7 +5794,7 @@ static int check_extent_exists(struct btrfs_root *root, u64 bytenr, path = btrfs_alloc_path(); if (!path) { - fprintf(stderr, "Error allocing path\n"); + fprintf(stderr, "Error allocating path\n"); return -ENOMEM; } @@ -5345,7 +5827,7 @@ again: /* * Block group items come before extent items if they have the same - * bytenr, so walk back one more just in case. Dear future traveler, + * bytenr, so walk back one more just in case. Dear future traveller, * first congrats on mastering time travel. Now if it's not too much * trouble could you go back to 2006 and tell Chris to make the * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the @@ -5548,27 +6030,41 @@ static int is_dropped_key(struct btrfs_key *key, return 0; } +/* + * Here are the rules for FULL_BACKREF. + * + * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set. + * 2) If btrfs_header_owner(buf) no longer points to buf then we have + * FULL_BACKREF set. + * 3) We cowed the block walking down a reloc tree. This is impossible to tell + * if it happened after the relocation occurred since we'll have dropped the + * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and + * have no real way to know for sure. + * + * We process the blocks one root at a time, and we start from the lowest root + * objectid and go to the highest. So we can just lookup the owner backref for + * the record and if we don't find it then we know it doesn't exist and we have + * a FULL BACKREF. + * + * FIXME: if we ever start reclaiming root objectid's then we need to fix this + * assumption and simply indicate that we _think_ that the FULL BACKREF needs to + * be set or not and then we can check later once we've gathered all the refs. + */ static int calc_extent_flag(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, struct root_item_record *ri, u64 *flags) { - int i; - int nritems = btrfs_header_nritems(buf); - struct btrfs_key key; struct extent_record *rec; struct cache_extent *cache; - struct data_backref *dback; struct tree_backref *tback; - struct extent_buffer *new_buf; u64 owner = 0; - u64 bytenr; - u64 offset; - u64 ptr; - int size; - int ret; - u8 level; + + cache = lookup_cache_extent(extent_cache, buf->start, 1); + /* we have added this extent before */ + BUG_ON(!cache); + rec = container_of(cache, struct extent_record, cache); /* * Except file/reloc tree, we can not have @@ -5581,96 +6077,32 @@ static int calc_extent_flag(struct btrfs_root *root, */ if (buf->start == ri->bytenr) goto normal; - if (btrfs_is_leaf(buf)) { - /* - * we are searching from original root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(buf); - if (owner == ri->objectid) - goto normal; - /* - * we check every eb here, and if any of - * eb dosen't have original root refers - * to this eb, we set full backref flag for - * this extent, otherwise normal backref. - */ - for (i = 0; i < nritems; i++) { - struct btrfs_file_extent_item *fi; - btrfs_item_key_to_cpu(buf, &key, i); - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - if (btrfs_file_extent_disk_bytenr(buf, fi) == 0) - continue; - bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) - goto full_backref; - offset = btrfs_file_extent_offset(buf, fi); - rec = container_of(cache, struct extent_record, cache); - dback = find_data_backref(rec, 0, ri->objectid, owner, - key.offset - offset, 1, bytenr, bytenr); - if (!dback) - goto full_backref; - } + if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) goto full_backref; - } else { - level = btrfs_header_level(buf); - for (i = 0; i < nritems; i++) { - ptr = btrfs_node_blockptr(buf, i); - size = btrfs_level_size(root, level); - if (i == 0) { - new_buf = read_tree_block(root, ptr, size, 0); - if (!extent_buffer_uptodate(new_buf)) { - free_extent_buffer(new_buf); - ret = -EIO; - return ret; - } - /* - * we are searching from origin root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(new_buf); - free_extent_buffer(new_buf); - if (owner == ri->objectid) - goto normal; - } - cache = lookup_cache_extent(extent_cache, ptr, size); - if (!cache) - goto full_backref; - rec = container_of(cache, struct extent_record, cache); - tback = find_tree_backref(rec, 0, owner); - if (!tback) - goto full_backref; - } - } + owner = btrfs_header_owner(buf); + if (owner == ri->objectid) + goto normal; + + tback = find_tree_backref(rec, 0, owner); + if (!tback) + goto full_backref; normal: *flags = 0; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 0; + if (rec->flag_block_full_backref != FLAG_UNSET && + rec->flag_block_full_backref != 0) + rec->bad_full_backref = 1; return 0; full_backref: *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 1; + if (rec->flag_block_full_backref != FLAG_UNSET && + rec->flag_block_full_backref != 1) + rec->bad_full_backref = 1; return 0; } -static int run_next_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int run_next_block(struct btrfs_root *root, struct block_info *bits, int bits_nr, u64 *last, @@ -5686,6 +6118,7 @@ static int run_next_block(struct btrfs_trans_handle *trans, struct root_item_record *ri) { struct extent_buffer *buf; + struct extent_record *rec = NULL; u64 bytenr; u32 size; u64 parent; @@ -5738,8 +6171,6 @@ static int run_next_block(struct btrfs_trans_handle *trans, } cache = lookup_cache_extent(extent_cache, bytenr, size); if (cache) { - struct extent_record *rec; - rec = container_of(cache, struct extent_record, cache); gen = rec->parent_generation; } @@ -5754,32 +6185,64 @@ static int run_next_block(struct btrfs_trans_handle *trans, nritems = btrfs_header_nritems(buf); - /* - * FIXME, this only works only if we don't have any full - * backref mode. - */ + flags = 0; if (!init_extent_tree) { ret = btrfs_lookup_extent_info(NULL, root, bytenr, btrfs_header_level(buf), 1, NULL, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + } } else { flags = 0; ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + } + + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (ri != NULL && + ri->objectid != BTRFS_TREE_RELOC_OBJECTID && + ri->objectid == btrfs_header_owner(buf)) { + /* + * Ok we got to this block from it's original owner and + * we have FULL_BACKREF set. Relocation can leave + * converted blocks over so this is altogether possible, + * however it's not possible if the generation > the + * last snapshot, so check for this case. + */ + if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) && + btrfs_header_generation(buf) > ri->last_snapshot) { + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } + } + } else { + if (ri != NULL && + (ri->objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } } if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + rec->flag_block_full_backref = 1; parent = bytenr; owner = 0; } else { + rec->flag_block_full_backref = 0; parent = 0; owner = btrfs_header_owner(buf); } - ret = check_block(trans, root, extent_cache, buf, flags); + ret = check_block(root, extent_cache, buf, flags); if (ret) goto out; @@ -5915,8 +6378,10 @@ static int run_next_block(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(buf, &first_key, 0); level = btrfs_header_level(buf); for (i = 0; i < nritems; i++) { + struct extent_record tmpl; + ptr = btrfs_node_blockptr(buf, i); - size = btrfs_level_size(root, level - 1); + size = root->nodesize; btrfs_node_key_to_cpu(buf, &key, i); if (ri != NULL) { if ((level == ri->drop_level) @@ -5924,10 +6389,16 @@ static int run_next_block(struct btrfs_trans_handle *trans, continue; } } - ret = add_extent_rec(extent_cache, &key, - btrfs_node_ptr_generation(buf, i), - ptr, size, 0, 0, 1, 0, 1, 0, - size); + + memset(&tmpl, 0, sizeof(tmpl)); + btrfs_cpu_key_to_disk(&tmpl.parent_key, &key); + tmpl.parent_generation = btrfs_node_ptr_generation(buf, i); + tmpl.start = ptr; + tmpl.nr = size; + tmpl.refs = 1; + tmpl.metadata = 1; + tmpl.max_size = size; + ret = add_extent_rec(extent_cache, &tmpl); BUG_ON(ret); add_tree_backref(extent_cache, ptr, parent, owner, 1); @@ -5963,12 +6434,21 @@ static int add_root_to_pending(struct extent_buffer *buf, struct cache_tree *nodes, u64 objectid) { + struct extent_record tmpl; + if (btrfs_header_level(buf) > 0) add_pending(nodes, seen, buf->start, buf->len); else add_pending(pending, seen, buf->start, buf->len); - add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len, - 0, 1, 1, 0, 1, 0, buf->len); + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = buf->start; + tmpl.nr = buf->len; + tmpl.is_root = 1; + tmpl.refs = 1; + tmpl.metadata = 1; + tmpl.max_size = buf->len; + add_extent_rec(extent_cache, &tmpl); if (objectid == BTRFS_TREE_RELOC_OBJECTID || btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) @@ -6022,7 +6502,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans, back->node.found_extent_tree = 0; if (!back->node.found_extent_tree && back->node.found_ref) { - list_del(&back->node.list); + rb_erase(&back->node.node, &rec->backref_tree); free(back); } } else { @@ -6041,7 +6521,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans, back->node.found_extent_tree = 0; } if (!back->node.found_extent_tree && back->node.found_ref) { - list_del(&back->node.list); + rb_erase(&back->node.node, &rec->backref_tree); free(back); } } @@ -6117,7 +6597,7 @@ static int delete_extent_records(struct btrfs_trans_handle *trans, if (found_key.type == BTRFS_EXTENT_ITEM_KEY || found_key.type == BTRFS_METADATA_ITEM_KEY) { u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ? - found_key.offset : root->leafsize; + found_key.offset : root->nodesize; ret = btrfs_update_block_group(trans, root, bytenr, bytes, 0, 0); @@ -6152,7 +6632,7 @@ static int record_extent(struct btrfs_trans_handle *trans, if (!back->is_data) rec->max_size = max_t(u64, rec->max_size, - info->extent_root->leafsize); + info->extent_root->nodesize); if (!allocated) { u32 item_size = sizeof(*ei); @@ -6182,7 +6662,7 @@ static int record_extent(struct btrfs_trans_handle *trans, } else { struct btrfs_disk_key copy_key;; - tback = (struct tree_backref *)back; + tback = to_tree_backref(back); bi = (struct btrfs_tree_block_info *)(ei + 1); memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); @@ -6211,7 +6691,7 @@ static int record_extent(struct btrfs_trans_handle *trans, u64 parent; int i; - dback = (struct data_backref *)back; + dback = to_data_backref(back); if (back->full_backref) parent = dback->parent; else @@ -6249,7 +6729,7 @@ static int record_extent(struct btrfs_trans_handle *trans, } else { u64 parent; - tback = (struct tree_backref *)back; + tback = to_tree_backref(back); if (back->full_backref) parent = tback->parent; else @@ -6260,23 +6740,13 @@ static int record_extent(struct btrfs_trans_handle *trans, parent, tback->root, 0, 0); fprintf(stderr, "adding new tree backref on " "start %llu len %llu parent %llu root %llu\n", - rec->start, rec->max_size, tback->parent, tback->root); + rec->start, rec->max_size, parent, tback->root); } - if (ret) - goto fail; fail: btrfs_release_path(path); return ret; } -struct extent_entry { - u64 bytenr; - u64 bytes; - int count; - int broken; - struct list_head list; -}; - static struct extent_entry *find_entry(struct list_head *entries, u64 bytenr, u64 bytes) { @@ -6331,16 +6801,16 @@ static struct extent_entry *find_most_right_entry(struct list_head *entries) return best; } -static int repair_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path, struct data_backref *dback, struct extent_entry *entry) { + struct btrfs_trans_handle *trans; struct btrfs_root *root; struct btrfs_file_extent_item *fi; struct extent_buffer *leaf; struct btrfs_key key; u64 bytenr, bytes; - int ret; + int ret, err; key.objectid = dback->root; key.type = BTRFS_ROOT_ITEM_KEY; @@ -6392,11 +6862,9 @@ static int repair_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); - /* - * Have to make sure that this root gets updated when we commit the - * transaction - */ - record_root_in_trans(trans, root); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); /* * Ok we have the key of the file extent we want to fix, now we can cow @@ -6406,13 +6874,14 @@ static int repair_ref(struct btrfs_trans_handle *trans, if (ret < 0) { fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n", key.objectid, key.type, key.offset, ret); - return ret; + goto out; } if (ret > 0) { fprintf(stderr, "Well that's odd, we just found this key " "[%Lu, %u, %Lu]\n", key.objectid, key.type, key.offset); - return -EINVAL; + ret = -EINVAL; + goto out; } leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -6425,7 +6894,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "system and send it to a btrfs developer so they can " "complete this functionality for bytenr %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } if (dback->node.broken && dback->disk_bytenr != entry->bytenr) { @@ -6442,7 +6912,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += off_diff; btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); @@ -6456,7 +6927,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, " take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += dback->disk_bytenr; @@ -6477,15 +6949,16 @@ static int repair_ref(struct btrfs_trans_handle *trans, else printf("ram bytes may be wrong?\n"); btrfs_mark_buffer_dirty(leaf); +out: + err = btrfs_commit_transaction(trans, root); btrfs_release_path(path); - return 0; + return ret ? ret : err; } -static int verify_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct extent_record *rec) { - struct extent_backref *back; + struct extent_backref *back, *tmp; struct data_backref *dback; struct extent_entry *entry, *best = NULL; LIST_HEAD(entries); @@ -6501,11 +6974,12 @@ static int verify_backrefs(struct btrfs_trans_handle *trans, if (rec->metadata) return 0; - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { if (back->full_backref || !back->is_data) continue; - dback = (struct data_backref *)back; + dback = to_data_backref(back); /* * We only pay attention to backrefs that we found a real @@ -6627,11 +7101,12 @@ static int verify_backrefs(struct btrfs_trans_handle *trans, * Ok great we all agreed on an extent record, let's go find the real * references and fix up the ones that don't match. */ - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { if (back->full_backref || !back->is_data) continue; - dback = (struct data_backref *)back; + dback = to_data_backref(back); /* * Still ignoring backrefs that don't have a real ref attached @@ -6644,7 +7119,7 @@ static int verify_backrefs(struct btrfs_trans_handle *trans, dback->disk_bytenr == best->bytenr) continue; - ret = repair_ref(trans, info, path, dback, best); + ret = repair_ref(info, path, dback, best); if (ret) goto out; } @@ -6695,7 +7170,7 @@ static int process_duplicates(struct btrfs_root *root, */ remove_cache_extent(extent_cache, &rec->cache); - good = list_entry(rec->dups.next, struct extent_record, list); + good = to_extent_record(rec->dups.next); list_del_init(&good->list); INIT_LIST_HEAD(&good->backrefs); INIT_LIST_HEAD(&good->dups); @@ -6745,15 +7220,15 @@ static int process_duplicates(struct btrfs_root *root, return good->num_duplicates ? 0 : 1; } -static int delete_duplicate_records(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int delete_duplicate_records(struct btrfs_root *root, struct extent_record *rec) { + struct btrfs_trans_handle *trans; LIST_HEAD(delete_list); struct btrfs_path *path; struct extent_record *tmp, *good, *n; int nr_del = 0; - int ret = 0; + int ret = 0, err; struct btrfs_key key; path = btrfs_alloc_path(); @@ -6772,7 +7247,7 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, if (tmp->start + tmp->nr < good->start + good->nr) { fprintf(stderr, "Ok we have overlapping extents that " - "aren't completely covered by eachother, this " + "aren't completely covered by each other, this " "is going to require more careful thought. " "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n", tmp->start, tmp->nr, good->start, good->nr); @@ -6791,6 +7266,12 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, } root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + list_for_each_entry(tmp, &delete_list, list) { if (tmp->found_rec == 0) continue; @@ -6810,18 +7291,20 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, if (ret) { if (ret > 0) ret = -EINVAL; - goto out; + break; } ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + break; btrfs_release_path(path); nr_del++; } - + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; out: while (!list_empty(&delete_list)) { - tmp = list_entry(delete_list.next, struct extent_record, list); + tmp = to_extent_record(delete_list.next); list_del_init(&tmp->list); if (tmp == rec) continue; @@ -6829,7 +7312,7 @@ out: } while (!list_empty(&rec->dups)) { - tmp = list_entry(rec->dups.next, struct extent_record, list); + tmp = to_extent_record(rec->dups.next); list_del_init(&tmp->list); free(tmp); } @@ -6842,14 +7325,13 @@ out: return ret ? ret : nr_del; } -static int find_possible_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int find_possible_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct cache_tree *extent_cache, struct extent_record *rec) { struct btrfs_root *root; - struct extent_backref *back; + struct extent_backref *back, *tmp; struct data_backref *dback; struct cache_extent *cache; struct btrfs_file_extent_item *fi; @@ -6857,12 +7339,13 @@ static int find_possible_backrefs(struct btrfs_trans_handle *trans, u64 bytenr, bytes; int ret; - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { /* Don't care about full backrefs (poor unloved backrefs) */ if (back->full_backref || !back->is_data) continue; - dback = (struct data_backref *)back; + dback = to_data_backref(back); /* We found this one, we don't need to do a lookup */ if (dback->found_ref) @@ -6945,7 +7428,7 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, { struct btrfs_key key; struct btrfs_root *dest_root; - struct extent_backref *back; + struct extent_backref *back, *tmp; struct data_backref *dback; struct orphan_data_extent *orphan; struct btrfs_path *path; @@ -6957,11 +7440,12 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { if (back->full_backref || !back->is_data || !back->found_extent_tree) continue; - dback = (struct data_backref *)back; + dback = to_data_backref(back); if (dback->found_ref) continue; key.objectid = dback->root; @@ -7017,34 +7501,20 @@ out: * all of the existing entries for it and recreate them * based on what the tree scan found. */ -static int fixup_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int fixup_extent_refs(struct btrfs_fs_info *info, struct cache_tree *extent_cache, struct extent_record *rec) { + struct btrfs_trans_handle *trans = NULL; int ret; struct btrfs_path *path; - struct list_head *cur = rec->backrefs.next; struct cache_extent *cache; - struct extent_backref *back; + struct extent_backref *back, *tmp; int allocated = 0; u64 flags = 0; - /* - * remember our flags for recreating the extent. - * FIXME, if we have cleared extent tree, we can not - * lookup extent info in extent tree. - */ - if (!init_extent_tree) { - ret = btrfs_lookup_extent_info(NULL, info->extent_root, - rec->start, rec->max_size, - rec->metadata, NULL, &flags); - if (ret < 0) - return ret; - } else { - if (rec->flag_block_full_backref) - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } + if (rec->flag_block_full_backref) + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; path = btrfs_alloc_path(); if (!path) @@ -7058,17 +7528,22 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, * them into the list if we find the backref so that * verify_backrefs can figure out what to do. */ - ret = find_possible_backrefs(trans, info, path, extent_cache, - rec); + ret = find_possible_backrefs(info, path, extent_cache, rec); if (ret < 0) goto out; } /* step one, make sure all of the backrefs agree */ - ret = verify_backrefs(trans, info, path, rec); + ret = verify_backrefs(info, path, rec); if (ret < 0) goto out; + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + /* step two, delete all the existing records */ ret = delete_extent_records(trans, info->extent_root, path, rec->start, rec->max_size); @@ -7085,10 +7560,8 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, } /* step three, recreate all the refs we did find */ - while(cur != &rec->backrefs) { - back = list_entry(cur, struct extent_backref, list); - cur = cur->next; - + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { /* * if we didn't find any references, don't create a * new extent record @@ -7096,6 +7569,7 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, if (!back->found_ref) continue; + rec->bad_full_backref = 0; ret = record_extent(trans, info, path, rec, back, allocated, flags); allocated = 1; @@ -7103,10 +7577,77 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, goto out; } out: + if (trans) { + int err = btrfs_commit_transaction(trans, info->extent_root); + if (!ret) + ret = err; + } + btrfs_free_path(path); return ret; } +static int fixup_extent_flags(struct btrfs_fs_info *fs_info, + struct extent_record *rec) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_path *path; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u64 flags; + int ret = 0; + + key.objectid = rec->start; + if (rec->metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = rec->info_level; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = rec->max_size; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return ret; + } else if (ret) { + fprintf(stderr, "Didn't find extent for %llu\n", + (unsigned long long)rec->start); + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return -ENOENT; + } + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path->nodes[0], ei); + if (rec->flag_block_full_backref) { + fprintf(stderr, "setting full backref on %llu\n", + (unsigned long long)key.objectid); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + fprintf(stderr, "clearing full backref on %llu\n", + (unsigned long long)key.objectid); + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + btrfs_set_extent_flags(path->nodes[0], ei, flags); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + return btrfs_commit_transaction(trans, root); +} + /* right now we only prune from the extent allocation tree */ static int prune_one_block(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info, @@ -7176,20 +7717,27 @@ out: return ret; } -static int prune_corrupt_blocks(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info) +static int prune_corrupt_blocks(struct btrfs_fs_info *info) { + struct btrfs_trans_handle *trans = NULL; struct cache_extent *cache; struct btrfs_corrupt_block *corrupt; - cache = search_cache_extent(info->corrupt_blocks, 0); while (1) { + cache = search_cache_extent(info->corrupt_blocks, 0); if (!cache) break; + if (!trans) { + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } corrupt = container_of(cache, struct btrfs_corrupt_block, cache); prune_one_block(trans, info, corrupt); - cache = next_cache_extent(cache); + remove_cache_extent(info->corrupt_blocks, cache); } + if (trans) + return btrfs_commit_transaction(trans, info->extent_root); return 0; } @@ -7219,8 +7767,7 @@ static void reset_cached_block_groups(struct btrfs_fs_info *fs_info) } } -static int check_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_extent_refs(struct btrfs_root *root, struct cache_tree *extent_cache) { struct extent_record *rec; @@ -7241,30 +7788,35 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, cache = search_cache_extent(extent_cache, 0); while(cache) { rec = container_of(cache, struct extent_record, cache); - btrfs_pin_extent(root->fs_info, - rec->start, rec->max_size); + set_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } /* pin down all the corrupted blocks too */ cache = search_cache_extent(root->fs_info->corrupt_blocks, 0); while(cache) { - btrfs_pin_extent(root->fs_info, - cache->start, cache->size); + set_extent_dirty(root->fs_info->excluded_extents, + cache->start, + cache->start + cache->size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } - prune_corrupt_blocks(trans, root->fs_info); + prune_corrupt_blocks(root->fs_info); reset_cached_block_groups(root->fs_info); } + reset_cached_block_groups(root->fs_info); + /* * We need to delete any duplicate entries we find first otherwise we * could mess up the extent tree when we have backrefs that actually * belong to a different extent item and not the weird duplicate one. */ while (repair && !list_empty(&duplicate_extents)) { - rec = list_entry(duplicate_extents.next, struct extent_record, - list); + rec = to_extent_record(duplicate_extents.next); list_del_init(&rec->list); /* Sometimes we can find a backref before we find an actual @@ -7276,7 +7828,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, */ if (process_duplicates(root, extent_cache, rec)) continue; - ret = delete_duplicate_records(trans, root, rec); + ret = delete_duplicate_records(root, rec); if (ret < 0) return ret; /* @@ -7292,6 +7844,8 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, return -EAGAIN; while(1) { + int cur_err = 0; + fixed = 0; recorded = 0; cache = search_cache_extent(extent_cache, 0); @@ -7302,6 +7856,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, fprintf(stderr, "extent item %llu has multiple extent " "items\n", (unsigned long long)rec->start); err = 1; + cur_err = 1; } if (rec->refs != rec->extent_item_refs) { @@ -7322,7 +7877,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, * extent, let the fallback method handle it. */ if (!fixed && repair) { - ret = fixup_extent_refs(trans, + ret = fixup_extent_refs( root->fs_info, extent_cache, rec); if (ret) @@ -7331,7 +7886,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, } } err = 1; - + cur_err = 1; } if (all_backpointers_checked(rec, 1)) { fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", @@ -7339,12 +7894,13 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, (unsigned long long)rec->nr); if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; fixed = 1; } + cur_err = 1; err = 1; } if (!rec->owner_ref_checked) { @@ -7352,17 +7908,55 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, (unsigned long long)rec->start, (unsigned long long)rec->nr); if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; fixed = 1; } err = 1; + cur_err = 1; + } + if (rec->bad_full_backref) { + fprintf(stderr, "bad full backref, on [%llu]\n", + (unsigned long long)rec->start); + if (repair) { + ret = fixup_extent_flags(root->fs_info, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + err = 1; + cur_err = 1; + } + /* + * Although it's not a extent ref's problem, we reuse this + * routine for error reporting. + * No repair function yet. + */ + if (rec->crossing_stripes) { + fprintf(stderr, + "bad metadata [%llu, %llu) crossing stripe boundary\n", + rec->start, rec->start + rec->max_size); + err = 1; + cur_err = 1; + } + + if (rec->wrong_chunk_type) { + fprintf(stderr, + "bad extent [%llu, %llu), type mismatch with chunk\n", + rec->start, rec->start + rec->max_size); + err = 1; + cur_err = 1; } remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); + if (!init_extent_tree && repair && (!cur_err || fixed)) + clear_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); free(rec); } repair_abort: @@ -7371,7 +7965,19 @@ repair_abort: fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); exit(1); } else if (!ret) { + struct btrfs_trans_handle *trans; + + root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto repair_abort; + } + btrfs_fix_block_accounting(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto repair_abort; } if (err) fprintf(stderr, "repaired damaged extent references\n"); @@ -7421,6 +8027,7 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, u64 devid; u64 offset; u64 length; + int metadump_v2 = 0; int i; int ret = 0; @@ -7433,7 +8040,8 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, cache); if (chunk_rec->length != block_group_rec->offset || chunk_rec->offset != block_group_rec->objectid || - chunk_rec->type_flags != block_group_rec->flags) { + (!metadump_v2 && + chunk_rec->type_flags != block_group_rec->flags)) { if (!silent) fprintf(stderr, "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n", @@ -7467,6 +8075,9 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, ret = 1; } + if (metadump_v2) + return ret; + length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length, chunk_rec->num_stripes); for (i = 0; i < chunk_rec->num_stripes; ++i) { @@ -7533,7 +8144,7 @@ int check_chunks(struct cache_tree *chunk_cache, cache); err = check_chunk_refs(chunk_rec, block_group_cache, dev_extent_cache, silent); - if (err) + if (err < 0) ret = err; if (err == 0 && good) list_add_tail(&chunk_rec->list, good); @@ -7632,7 +8243,7 @@ static int check_devices(struct rb_root *dev_cache, } static int add_root_item_to_list(struct list_head *head, - u64 objectid, u64 bytenr, + u64 objectid, u64 bytenr, u64 last_snapshot, u8 level, u8 drop_level, int level_size, struct btrfs_key *drop_key) { @@ -7646,6 +8257,7 @@ static int add_root_item_to_list(struct list_head *head, ri_rec->level = level; ri_rec->level_size = level_size; ri_rec->drop_level = drop_level; + ri_rec->last_snapshot = last_snapshot; if (drop_key) memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key)); list_add_tail(&ri_rec->list, head); @@ -7653,8 +8265,19 @@ static int add_root_item_to_list(struct list_head *head, return 0; } +static void free_root_item_list(struct list_head *list) +{ + struct root_item_record *ri_rec; + + while (!list_empty(list)) { + ri_rec = list_first_entry(list, struct root_item_record, + list); + list_del_init(&ri_rec->list); + free(ri_rec); + } +} + static int deal_root_from_list(struct list_head *list, - struct btrfs_trans_handle *trans, struct btrfs_root *root, struct block_info *bits, int bits_nr, @@ -7691,29 +8314,25 @@ static int deal_root_from_list(struct list_head *list, * one by one, otherwise we deal with node firstly which * can maximize readahead. */ - if (!init_extent_tree && !rec->drop_level) - goto skip; while (1) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, + pending, seen, reada, nodes, + extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, rec); if (ret != 0) break; } -skip: free_extent_buffer(buf); list_del(&rec->list); free(rec); + if (ret < 0) + break; } while (ret >= 0) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, pending, seen, + reada, nodes, extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, NULL); if (ret != 0) { if (ret > 0) @@ -7735,6 +8354,7 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct cache_tree pending; struct cache_tree reada; struct cache_tree nodes; + struct extent_io_tree excluded_extents; struct cache_tree corrupt_blocks; struct btrfs_path path; struct btrfs_key key; @@ -7743,7 +8363,6 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct block_info *bits; int bits_nr; struct extent_buffer *leaf; - struct btrfs_trans_handle *trans = NULL; int slot; struct btrfs_root_item ri; struct list_head dropping_trees; @@ -7764,15 +8383,12 @@ static int check_chunks_and_extents(struct btrfs_root *root) cache_tree_init(&nodes); cache_tree_init(&reada); cache_tree_init(&corrupt_blocks); + extent_io_tree_init(&excluded_extents); INIT_LIST_HEAD(&dropping_trees); INIT_LIST_HEAD(&normal_trees); if (repair) { - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - fprintf(stderr, "Error starting transaction\n"); - return PTR_ERR(trans); - } + root->fs_info->excluded_extents = &excluded_extents; root->fs_info->fsck_extent_cache = &extent_cache; root->fs_info->free_extent_hook = free_extent_hook; root->fs_info->corrupt_blocks = &corrupt_blocks; @@ -7785,143 +8401,1613 @@ static int check_chunks_and_extents(struct btrfs_root *root) exit(1); } -again: - root1 = root->fs_info->tree_root; - level = btrfs_header_level(root1->node); - ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, - btrfs_level_size(root1, level), NULL); - if (ret < 0) - goto out; + if (ctx.progress_enabled) { + ctx.tp = TASK_EXTENTS; + task_start(ctx.info); + } + +again: + root1 = root->fs_info->tree_root; + level = btrfs_header_level(root1->node); + ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, + root1->node->start, 0, level, 0, + root1->nodesize, NULL); + if (ret < 0) + goto out; + root1 = root->fs_info->chunk_root; + level = btrfs_header_level(root1->node); + ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, + root1->node->start, 0, level, 0, + root1->nodesize, NULL); + if (ret < 0) + goto out; + btrfs_init_path(&path); + key.offset = 0; + key.objectid = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_search_slot(NULL, root->fs_info->tree_root, + &key, &path, 0, 0); + if (ret < 0) + goto out; + while(1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + if (slot >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret != 0) + break; + leaf = path.nodes[0]; + slot = path.slots[0]; + } + btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); + if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) { + unsigned long offset; + u64 last_snapshot; + + offset = btrfs_item_ptr_offset(leaf, path.slots[0]); + read_extent_buffer(leaf, &ri, offset, sizeof(ri)); + last_snapshot = btrfs_root_last_snapshot(&ri); + if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { + level = btrfs_root_level(&ri); + level_size = root->nodesize; + ret = add_root_item_to_list(&normal_trees, + found_key.objectid, + btrfs_root_bytenr(&ri), + last_snapshot, level, + 0, level_size, NULL); + if (ret < 0) + goto out; + } else { + level = btrfs_root_level(&ri); + level_size = root->nodesize; + objectid = found_key.objectid; + btrfs_disk_key_to_cpu(&found_key, + &ri.drop_progress); + ret = add_root_item_to_list(&dropping_trees, + objectid, + btrfs_root_bytenr(&ri), + last_snapshot, level, + ri.drop_level, + level_size, &found_key); + if (ret < 0) + goto out; + } + } + path.slots[0]++; + } + btrfs_release_path(&path); + + /* + * check_block can return -EAGAIN if it fixes something, please keep + * this in mind when dealing with return values from these functions, if + * we get -EAGAIN we want to fall through and restart the loop. + */ + ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending, + &seen, &reada, &nodes, &extent_cache, + &chunk_cache, &dev_cache, &block_group_cache, + &dev_extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } + ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr, + &pending, &seen, &reada, &nodes, + &extent_cache, &chunk_cache, &dev_cache, + &block_group_cache, &dev_extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } + + ret = check_chunks(&chunk_cache, &block_group_cache, + &dev_extent_cache, NULL, NULL, NULL, 0); + if (ret) { + if (ret == -EAGAIN) + goto loop; + err = ret; + } + + ret = check_extent_refs(root, &extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } + + ret = check_devices(&dev_cache, &dev_extent_cache); + if (ret && err) + ret = err; + +out: + task_stop(ctx.info); + if (repair) { + free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + extent_io_tree_cleanup(&excluded_extents); + root->fs_info->fsck_extent_cache = NULL; + root->fs_info->free_extent_hook = NULL; + root->fs_info->corrupt_blocks = NULL; + root->fs_info->excluded_extents = NULL; + } + free(bits); + free_chunk_cache_tree(&chunk_cache); + free_device_cache_tree(&dev_cache); + free_block_group_tree(&block_group_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); + return ret; +loop: + free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); + free_chunk_cache_tree(&chunk_cache); + free_block_group_tree(&block_group_cache); + free_device_cache_tree(&dev_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_record_cache(root->fs_info, &extent_cache); + free_root_item_list(&normal_trees); + free_root_item_list(&dropping_trees); + extent_io_tree_cleanup(&excluded_extents); + goto again; +} + +/* + * Check backrefs of a tree block given by @bytenr or @eb. + * + * @root: the root containing the @bytenr or @eb + * @eb: tree block extent buffer, can be NULL + * @bytenr: bytenr of the tree block to search + * @level: tree level of the tree block + * @owner: owner of the tree block + * + * Return >0 for any error found and output error message + * Return 0 for no error found + */ +static int check_tree_block_ref(struct btrfs_root *root, + struct extent_buffer *eb, u64 bytenr, + int level, u64 owner) +{ + struct btrfs_key key; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path path; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct extent_buffer *leaf; + unsigned long end; + unsigned long ptr; + int slot; + int skinny_level; + int type; + u32 nodesize = root->nodesize; + u32 item_size; + u64 offset; + int found_ref = 0; + int err = 0; + int ret; + + btrfs_init_path(&path); + key.objectid = bytenr; + if (btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + + /* Search for the backref in extent tree */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) { + err |= BACKREF_MISSING; + goto out; + } + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) { + err |= BACKREF_MISSING; + goto out; + } + + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + skinny_level = (int)key.offset; + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + skinny_level = btrfs_tree_block_level(leaf, info); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } + + if (eb) { + u64 header_gen; + u64 extent_gen; + + if (!(btrfs_extent_flags(leaf, ei) & + BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + error( + "extent[%llu %u] backref type mismatch, missing bit: %llx", + key.objectid, nodesize, + BTRFS_EXTENT_FLAG_TREE_BLOCK); + err = BACKREF_MISMATCH; + } + header_gen = btrfs_header_generation(eb); + extent_gen = btrfs_extent_generation(leaf, ei); + if (header_gen != extent_gen) { + error( + "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu", + key.objectid, nodesize, header_gen, + extent_gen); + err = BACKREF_MISMATCH; + } + if (level != skinny_level) { + error( + "extent[%llu %u] level mismatch, wanted: %u, have: %u", + key.objectid, nodesize, level, skinny_level); + err = BACKREF_MISMATCH; + } + if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) { + error( + "extent[%llu %u] is referred by other roots than %llu", + key.objectid, nodesize, root->objectid); + err = BACKREF_MISMATCH; + } + } + + /* + * Iterate the extent/metadata item to find the exact backref + */ + item_size = btrfs_item_size_nr(leaf, slot); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + if (type == BTRFS_TREE_BLOCK_REF_KEY && + (offset == root->objectid || offset == owner)) { + found_ref = 1; + } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { + /* Check if the backref points to valid referencer */ + found_ref = !check_tree_block_ref(root, NULL, offset, + level + 1, owner); + } + + if (found_ref) + break; + ptr += btrfs_extent_inline_ref_size(type); + } + + /* + * Inlined extent item doesn't have what we need, check + * TREE_BLOCK_REF_KEY + */ + if (!found_ref) { + btrfs_release_path(&path); + key.objectid = bytenr; + key.type = BTRFS_TREE_BLOCK_REF_KEY; + key.offset = root->objectid; + + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (!ret) + found_ref = 1; + } + if (!found_ref) + err |= BACKREF_MISSING; +out: + btrfs_release_path(&path); + if (eb && (err & BACKREF_MISSING)) + error("extent[%llu %u] backref lost (owner: %llu, level: %u)", + bytenr, nodesize, owner, level); + return err; +} + +/* + * Check EXTENT_DATA item, mainly for its dbackref in extent tree + * + * Return >0 any error found and output error message + * Return 0 for no error found + */ +static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *eb, int slot) +{ + struct btrfs_file_extent_item *fi; + struct btrfs_path path; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_key fi_key; + struct btrfs_key dbref_key; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + u64 owner; + u64 file_extent_gen; + u64 disk_bytenr; + u64 disk_num_bytes; + u64 extent_num_bytes; + u64 extent_flags; + u64 extent_gen; + u32 item_size; + unsigned long end; + unsigned long ptr; + int type; + u64 ref_root; + int found_dbackref = 0; + int err = 0; + int ret; + + btrfs_item_key_to_cpu(eb, &fi_key, slot); + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + file_extent_gen = btrfs_file_extent_generation(eb, fi); + + /* Nothing to check for hole and inline data extents */ + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE || + btrfs_file_extent_disk_bytenr(eb, fi) == 0) + return 0; + + disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi); + disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi); + + /* Check unaligned disk_num_bytes and num_bytes */ + if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, disk_num_bytes, + root->sectorsize); + err |= BYTES_UNALIGNED; + } else { + data_bytes_allocated += disk_num_bytes; + } + if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, extent_num_bytes, + root->sectorsize); + err |= BYTES_UNALIGNED; + } else { + data_bytes_referenced += extent_num_bytes; + } + owner = btrfs_header_owner(eb); + + /* Check the extent item of the file extent in extent tree */ + btrfs_init_path(&path); + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_ITEM_KEY; + dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi); + + ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0); + if (ret) { + err |= BACKREF_MISSING; + goto error; + } + + leaf = path.nodes[0]; + slot = path.slots[0]; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + extent_flags = btrfs_extent_flags(leaf, ei); + extent_gen = btrfs_extent_generation(leaf, ei); + + if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) { + error( + "extent[%llu %llu] backref type mismatch, wanted bit: %llx", + disk_bytenr, disk_num_bytes, + BTRFS_EXTENT_FLAG_DATA); + err |= BACKREF_MISMATCH; + } + + if (file_extent_gen < extent_gen) { + error( +"extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu", + disk_bytenr, disk_num_bytes, file_extent_gen, + extent_gen); + err |= BACKREF_MISMATCH; + } + + /* Check data backref inside that extent item */ + item_size = btrfs_item_size_nr(leaf, path.slots[0]); + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + ref_root = btrfs_extent_data_ref_root(leaf, dref); + if (ref_root == owner || ref_root == root->objectid) + found_dbackref = 1; + } else if (type == BTRFS_SHARED_DATA_REF_KEY) { + found_dbackref = !check_tree_block_ref(root, NULL, + btrfs_extent_inline_ref_offset(leaf, iref), + 0, owner); + } + + if (found_dbackref) + break; + ptr += btrfs_extent_inline_ref_size(type); + } + + /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */ + if (!found_dbackref) { + btrfs_release_path(&path); + + btrfs_init_path(&path); + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY; + dbref_key.offset = hash_extent_data_ref(root->objectid, + fi_key.objectid, fi_key.offset); + + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &dbref_key, &path, 0, 0); + if (!ret) + found_dbackref = 1; + } + + if (!found_dbackref) + err |= BACKREF_MISSING; +error: + btrfs_release_path(&path); + if (err & BACKREF_MISSING) { + error("data extent[%llu %llu] backref lost", + disk_bytenr, disk_num_bytes); + } + return err; +} + +/* + * Get real tree block level for the case like shared block + * Return >= 0 as tree level + * Return <0 for error + */ +static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr) +{ + struct extent_buffer *eb; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_extent_item *ei; + u64 flags; + u64 transid; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u8 backref_level; + u8 header_level; + int ret; + + /* Search extent tree for extent generation and level */ + key.objectid = bytenr; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0); + if (ret < 0) + goto release_out; + ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr); + if (ret < 0) + goto release_out; + if (ret > 0) { + ret = -ENOENT; + goto release_out; + } + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path.nodes[0], ei); + if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + ret = -ENOENT; + goto release_out; + } + + /* Get transid for later read_tree_block() check */ + transid = btrfs_extent_generation(path.nodes[0], ei); + + /* Get backref level as one source */ + if (key.type == BTRFS_METADATA_ITEM_KEY) { + backref_level = key.offset; + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + backref_level = btrfs_tree_block_level(path.nodes[0], info); + } + btrfs_release_path(&path); + + /* Get level from tree block as an alternative source */ + eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid); + if (!extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } + header_level = btrfs_header_level(eb); + free_extent_buffer(eb); + + if (header_level != backref_level) + return -EIO; + return header_level; + +release_out: + btrfs_release_path(&path); + return ret; +} + +/* + * Check if a tree block backref is valid (points to a valid tree block) + * if level == -1, level will be resolved + * Return >0 for any error found and print error message + */ +static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id, + u64 bytenr, int level) +{ + struct btrfs_root *root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *eb; + struct extent_buffer *node; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + int err = 0; + int ret; + + /* Query level for level == -1 special case */ + if (level == -1) + level = query_tree_block_level(fs_info, bytenr); + if (level < 0) { + err |= REFERENCER_MISSING; + goto out; + } + + key.objectid = root_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) { + err |= REFERENCER_MISSING; + goto out; + } + + /* Read out the tree block to get item/node key */ + eb = read_tree_block(root, bytenr, root->nodesize, 0); + if (!extent_buffer_uptodate(eb)) { + err |= REFERENCER_MISSING; + free_extent_buffer(eb); + goto out; + } + + /* Empty tree, no need to check key */ + if (!btrfs_header_nritems(eb) && !level) { + free_extent_buffer(eb); + goto out; + } + + if (level) + btrfs_node_key_to_cpu(eb, &key, 0); + else + btrfs_item_key_to_cpu(eb, &key, 0); + + free_extent_buffer(eb); + + btrfs_init_path(&path); + /* Search with the first key, to ensure we can reach it */ + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret) { + err |= REFERENCER_MISSING; + goto release_out; + } + + node = path.nodes[level]; + if (btrfs_header_bytenr(node) != bytenr) { + error( + "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu", + bytenr, nodesize, bytenr, + btrfs_header_bytenr(node)); + err |= REFERENCER_MISMATCH; + } + if (btrfs_header_level(node) != level) { + error( + "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d", + bytenr, nodesize, level, + btrfs_header_level(node)); + err |= REFERENCER_MISMATCH; + } + +release_out: + btrfs_release_path(&path); +out: + if (err & REFERENCER_MISSING) { + if (level < 0) + error("extent [%llu %d] lost referencer (owner: %llu)", + bytenr, nodesize, root_id); + else + error( + "extent [%llu %d] lost referencer (owner: %llu, level: %u)", + bytenr, nodesize, root_id, level); + } + + return err; +} + +/* + * Check referencer for shared block backref + * If level == -1, this function will resolve the level. + */ +static int check_shared_block_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr, int level) +{ + struct extent_buffer *eb; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 nr; + int found_parent = 0; + int i; + + eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + if (!extent_buffer_uptodate(eb)) + goto out; + + if (level == -1) + level = query_tree_block_level(fs_info, bytenr); + if (level < 0) + goto out; + + if (level + 1 != btrfs_header_level(eb)) + goto out; + + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + if (bytenr == btrfs_node_blockptr(eb, i)) { + found_parent = 1; + break; + } + } +out: + free_extent_buffer(eb); + if (!found_parent) { + error( + "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)", + bytenr, nodesize, parent, level); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check referencer for normal (inlined) data ref + * If len == 0, it will be resolved by searching in extent tree + */ +static int check_extent_data_backref(struct btrfs_fs_info *fs_info, + u64 root_id, u64 objectid, u64 offset, + u64 bytenr, u64 len, u32 count) +{ + struct btrfs_root *root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + u32 found_count = 0; + int slot; + int ret = 0; + + if (!len) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) + goto out; + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) + goto out; + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid != bytenr || + key.type != BTRFS_EXTENT_ITEM_KEY) + goto out; + len = key.offset; + btrfs_release_path(&path); + } + key.objectid = root_id; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.offset = (u64)-1; + btrfs_init_path(&path); + + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) + goto out; + + key.objectid = objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + /* + * It can be nasty as data backref offset is + * file offset - file extent offset, which is smaller or + * equal to original backref offset. The only special case is + * overflow. So we need to special check and do further search. + */ + key.offset = offset & (1ULL << 63) ? 0 : offset; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + + /* + * Search afterwards to get correct one + * NOTE: As we must do a comprehensive check on the data backref to + * make sure the dref count also matches, we must iterate all file + * extents for that inode. + */ + while (1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) + break; + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + /* + * Except normal disk bytenr and disk num bytes, we still + * need to do extra check on dbackref offset as + * dbackref offset = file_offset - file_extent_offset + */ + if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr && + btrfs_file_extent_disk_num_bytes(leaf, fi) == len && + (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) == + offset) + found_count++; + + ret = btrfs_next_item(root, &path); + if (ret) + break; + } +out: + btrfs_release_path(&path); + if (found_count != count) { + error( +"extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u", + bytenr, len, root_id, objectid, offset, count, found_count); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check if the referencer of a shared data backref exists + */ +static int check_shared_data_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr) +{ + struct extent_buffer *eb; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 nr; + int found_parent = 0; + int i; + + eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + if (!extent_buffer_uptodate(eb)) + goto out; + + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + + fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE) + continue; + + if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) { + found_parent = 1; + break; + } + } + +out: + free_extent_buffer(eb); + if (!found_parent) { + error("shared extent %llu referencer lost (parent: %llu)", + bytenr, parent); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * This function will check a given extent item, including its backref and + * itself (like crossing stripe boundary and type) + * + * Since we don't use extent_record anymore, introduce new error bit + */ +static int check_extent_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + unsigned long end; + unsigned long ptr; + int type; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 flags; + u64 offset; + int metadata = 0; + int level; + struct btrfs_key key; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.type == BTRFS_EXTENT_ITEM_KEY) + bytes_used += key.offset; + else + bytes_used += nodesize; + + if (item_size < sizeof(*ei)) { + /* + * COMPAT_EXTENT_TREE_V0 case, but it's already a super + * old thing when on disk format is still un-determined. + * No need to care about it anymore + */ + error("unsupported COMPAT_EXTENT_TREE_V0 detected"); + return -ENOTTY; + } + + ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); + flags = btrfs_extent_flags(eb, ei); + + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; + if (metadata && check_crossing_stripes(key.objectid, eb->len)) { + error("bad metadata [%llu, %llu) crossing stripe boundary", + key.objectid, key.objectid + nodesize); + err |= CROSSING_STRIPE_BOUNDARY; + } + + ptr = (unsigned long)(ei + 1); + + if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) { + /* Old EXTENT_ITEM metadata */ + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)ptr; + level = btrfs_tree_block_level(eb, info); + ptr += sizeof(struct btrfs_tree_block_info); + } else { + /* New METADATA_ITEM */ + level = key.offset; + } + end = (unsigned long)ei + item_size; + + if (ptr >= end) { + err |= ITEM_SIZE_MISMATCH; + goto out; + } + + /* Now check every backref in this extent item */ +next: + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(eb, iref); + offset = btrfs_extent_inline_ref_offset(eb, iref); + switch (type) { + case BTRFS_TREE_BLOCK_REF_KEY: + ret = check_tree_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = check_shared_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + ret = check_extent_data_backref(fs_info, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, dref), + btrfs_extent_data_ref_offset(eb, dref), + key.objectid, key.offset, + btrfs_extent_data_ref_count(eb, dref)); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + ret = check_shared_data_backref(fs_info, offset, key.objectid); + err |= ret; + break; + default: + error("extent[%llu %d %llu] has unknown ref type: %d", + key.objectid, key.type, key.offset, type); + err |= UNKNOWN_TYPE; + goto out; + } + + ptr += btrfs_extent_inline_ref_size(type); + if (ptr < end) + goto next; + +out: + return err; +} + +/* + * Check if a dev extent item is referred correctly by its chunk + */ +static int check_dev_extent_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_dev_extent *ptr; + struct btrfs_path path; + struct btrfs_key chunk_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; + struct extent_buffer *l; + int num_stripes; + u64 length; + int i; + int found_chunk = 0; + int ret; + + btrfs_item_key_to_cpu(eb, &devext_key, slot); + ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent); + length = btrfs_dev_extent_length(eb, ptr); + + chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr); + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr); + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) + goto out; + + l = path.nodes[0]; + chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk); + if (btrfs_chunk_length(l, chunk) != length) + goto out; + + num_stripes = btrfs_chunk_num_stripes(l, chunk); + for (i = 0; i < num_stripes; i++) { + u64 devid = btrfs_stripe_devid_nr(l, chunk, i); + u64 offset = btrfs_stripe_offset_nr(l, chunk, i); + + if (devid == devext_key.objectid && + offset == devext_key.offset) { + found_chunk = 1; + break; + } + } +out: + btrfs_release_path(&path); + if (!found_chunk) { + error( + "device extent[%llu, %llu, %llu] did not find the related chunk", + devext_key.objectid, devext_key.offset, length); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check if the used space is correct with the dev item + */ +static int check_dev_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_dev_item *dev_item; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_dev_extent *ptr; + u64 dev_id; + u64 used; + u64 total = 0; + int ret; + + dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item); + dev_id = btrfs_device_id(eb, dev_item); + used = btrfs_device_bytes_used(eb, dev_item); + + key.objectid = dev_id; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0); + if (ret < 0) { + btrfs_item_key_to_cpu(eb, &key, slot); + error("cannot find any related dev extent for dev[%llu, %u, %llu]", + key.objectid, key.type, key.offset); + btrfs_release_path(&path); + return REFERENCER_MISSING; + } + + /* Iterate dev_extents to calculate the used space of a device */ + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + + if (key.objectid > dev_id) + break; + if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id) + goto next; + + ptr = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_dev_extent); + total += btrfs_dev_extent_length(path.nodes[0], ptr); +next: + ret = btrfs_next_item(dev_root, &path); + if (ret) + break; + } + btrfs_release_path(&path); + + if (used != total) { + btrfs_item_key_to_cpu(eb, &key, slot); + error( +"Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]", + total, used, BTRFS_ROOT_TREE_OBJECTID, + BTRFS_DEV_EXTENT_KEY, dev_id); + return ACCOUNTING_MISMATCH; + } + return 0; +} + +/* + * Check a block group item with its referener (chunk) and its used space + * with extent/metadata item + */ +static int check_block_group_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_path path; + struct btrfs_key bg_key; + struct btrfs_key chunk_key; + struct btrfs_key extent_key; + struct btrfs_chunk *chunk; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u64 flags; + u64 bg_flags; + u64 used; + u64 total = 0; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &bg_key, slot); + bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item); + read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item)); + used = btrfs_block_group_used(&bg_item); + bg_flags = btrfs_block_group_flags(&bg_item); + + chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = bg_key.objectid; + + btrfs_init_path(&path); + /* Search for the referencer chunk */ + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) { + error( + "block group[%llu %llu] did not find the related chunk item", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISSING; + } else { + chunk = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_chunk); + if (btrfs_chunk_length(path.nodes[0], chunk) != + bg_key.offset) { + error( + "block group[%llu %llu] related chunk item length does not match", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISMATCH; + } + } + btrfs_release_path(&path); + + /* Search from the block group bytenr */ + extent_key.objectid = bg_key.objectid; + extent_key.type = 0; + extent_key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0); + if (ret < 0) + goto out; + + /* Iterate extent tree to account used space */ + while (1) { + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]); + if (extent_key.objectid >= bg_key.objectid + bg_key.offset) + break; + + if (extent_key.type != BTRFS_METADATA_ITEM_KEY && + extent_key.type != BTRFS_EXTENT_ITEM_KEY) + goto next; + if (extent_key.objectid < bg_key.objectid) + goto next; + + if (extent_key.type == BTRFS_METADATA_ITEM_KEY) + total += nodesize; + else + total += extent_key.offset; + + ei = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + if (flags & BTRFS_EXTENT_FLAG_DATA) { + if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + extent_key.offset); + err |= CHUNK_TYPE_MISMATCH; + } + } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + nodesize); + err |= CHUNK_TYPE_MISMATCH; + } + } +next: + ret = btrfs_next_item(extent_root, &path); + if (ret) + break; + } + +out: + btrfs_release_path(&path); + + if (total != used) { + error( + "block group[%llu %llu] used %llu but extent items used %llu", + bg_key.objectid, bg_key.offset, used, total); + err |= ACCOUNTING_MISMATCH; + } + return err; +} + +/* + * Check a chunk item. + * Including checking all referred dev_extents and block group + */ +static int check_chunk_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_path path; + struct btrfs_key chunk_key; + struct btrfs_key bg_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; + struct extent_buffer *leaf; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_dev_extent *ptr; + u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy); + u64 length; + u64 chunk_end; + u64 type; + u64 profile; + int num_stripes; + u64 offset; + u64 objectid; + int i; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &chunk_key, slot); + chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); + length = btrfs_chunk_length(eb, chunk); + chunk_end = chunk_key.offset + length; + if (!IS_ALIGNED(length, sectorsize)) { + error("chunk[%llu %llu) not aligned to %u", + chunk_key.offset, chunk_end, sectorsize); + err |= BYTES_UNALIGNED; + goto out; + } + + type = btrfs_chunk_type(eb, chunk); + profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK; + if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + error("chunk[%llu %llu) has no chunk type", + chunk_key.offset, chunk_end); + err |= UNKNOWN_TYPE; + } + if (profile && (profile & (profile - 1))) { + error("chunk[%llu %llu) multiple profiles detected: %llx", + chunk_key.offset, chunk_end, profile); + err |= UNKNOWN_TYPE; + } + + bg_key.objectid = chunk_key.offset; + bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + bg_key.offset = length; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0); + if (ret) { + error( + "chunk[%llu %llu) did not find the related block group item", + chunk_key.offset, chunk_end); + err |= REFERENCER_MISSING; + } else{ + leaf = path.nodes[0]; + bi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_block_group_item); + read_extent_buffer(leaf, &bg_item, (unsigned long)bi, + sizeof(bg_item)); + if (btrfs_block_group_flags(&bg_item) != type) { + error( +"chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu", + chunk_key.offset, chunk_end, type, + btrfs_block_group_flags(&bg_item)); + err |= REFERENCER_MISSING; + } + } + + num_stripes = btrfs_chunk_num_stripes(eb, chunk); + for (i = 0; i < num_stripes; i++) { + btrfs_release_path(&path); + btrfs_init_path(&path); + devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i); + devext_key.type = BTRFS_DEV_EXTENT_KEY; + devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i); + + ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path, + 0, 0); + if (ret) + goto not_match_dev; + + leaf = path.nodes[0]; + ptr = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_dev_extent); + objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr); + offset = btrfs_dev_extent_chunk_offset(leaf, ptr); + if (objectid != chunk_key.objectid || + offset != chunk_key.offset || + btrfs_dev_extent_length(leaf, ptr) != length) + goto not_match_dev; + continue; +not_match_dev: + err |= BACKREF_MISSING; + error( + "chunk[%llu %llu) stripe %d did not find the related dev extent", + chunk_key.objectid, chunk_end, i); + continue; + } + btrfs_release_path(&path); +out: + return err; +} + +/* + * Main entry function to check known items and update related accounting info + */ +static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_key key; + int slot = 0; + int type; + struct btrfs_extent_data_ref *dref; + int ret; + int err = 0; + +next: + btrfs_item_key_to_cpu(eb, &key, slot); + type = btrfs_key_type(&key); + + switch (type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, eb, slot); + err |= ret; + break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + ret = check_block_group_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_DEV_ITEM_KEY: + ret = check_dev_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_CHUNK_ITEM_KEY: + ret = check_chunk_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_EXTENT_ITEM_KEY: + case BTRFS_METADATA_ITEM_KEY: + ret = check_extent_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_EXTENT_CSUM_KEY: + total_csum_bytes += btrfs_item_size_nr(eb, slot); + break; + case BTRFS_TREE_BLOCK_REF_KEY: + ret = check_tree_block_backref(fs_info, key.offset, + key.objectid, -1); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref); + ret = check_extent_data_backref(fs_info, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, dref), + btrfs_extent_data_ref_offset(eb, dref), + key.objectid, 0, + btrfs_extent_data_ref_count(eb, dref)); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = check_shared_block_backref(fs_info, key.offset, + key.objectid, -1); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + ret = check_shared_data_backref(fs_info, key.offset, + key.objectid); + err |= ret; + break; + default: + break; + } + + if (++slot < btrfs_header_nritems(eb)) + goto next; + + return err; +} + +/* + * Helper function for later fs/subvol tree check. To determine if a tree + * block should be checked. + * This function will ensure only the direct referencer with lowest rootid to + * check a fs/subvolume tree block. + * + * Backref check at extent tree would detect errors like missing subvolume + * tree, so we can do aggressive check to reduce duplicated checks. + */ +static int should_check(struct btrfs_root *root, struct extent_buffer *eb) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *leaf; + int slot; + struct btrfs_extent_item *ei; + unsigned long ptr; + unsigned long end; + int type; + u32 item_size; + u64 offset; + struct btrfs_extent_inline_ref *iref; + int ret; + + btrfs_init_path(&path); + key.objectid = btrfs_header_bytenr(eb); + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + + /* + * Any failure in backref resolving means we can't determine + * whom the tree block belongs to. + * So in that case, we need to check that tree block + */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) + goto need_check; + + ret = btrfs_previous_extent_item(extent_root, &path, + btrfs_header_bytenr(eb)); + if (ret) + goto need_check; + + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } + + item_size = btrfs_item_size_nr(leaf, slot); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + /* + * We only check the tree block if current root is + * the lowest referencer of it. + */ + if (type == BTRFS_TREE_BLOCK_REF_KEY && + offset < root->objectid) { + btrfs_release_path(&path); + return 0; + } + + ptr += btrfs_extent_inline_ref_size(type); + } + /* + * Normally we should also check keyed tree block ref, but that may be + * very time consuming. Inlined ref should already make us skip a lot + * of refs now. So skip search keyed tree block ref. + */ + +need_check: + btrfs_release_path(&path); + return 1; +} + +/* + * Traversal function for tree block. We will do: + * 1) Skip shared fs/subvolume tree blocks + * 2) Update related bytes accounting + * 3) Pre-order traversal + */ +static int traverse_tree_block(struct btrfs_root *root, + struct extent_buffer *node) +{ + struct extent_buffer *eb; + int level; + u64 nr; + int i; + int err = 0; + int ret; + + /* + * Skip shared fs/subvolume tree block, in that case they will + * be checked by referencer with lowest rootid + */ + if (is_fstree(root->objectid) && !should_check(root, node)) + return 0; + + /* Update bytes accounting */ + total_btree_bytes += node->len; + if (fs_root_objectid(btrfs_header_owner(node))) + total_fs_tree_bytes += node->len; + if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID) + total_extent_tree_bytes += node->len; + if (!found_old_backref && + btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID && + btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV && + !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC)) + found_old_backref = 1; + + /* pre-order tranversal, check itself first */ + level = btrfs_header_level(node); + ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node), + btrfs_header_level(node), + btrfs_header_owner(node)); + err |= ret; + if (err) + error( + "check %s failed root %llu bytenr %llu level %d, force continue check", + level ? "node":"leaf", root->objectid, + btrfs_header_bytenr(node), btrfs_header_level(node)); + + if (!level) { + btree_space_waste += btrfs_leaf_free_space(root, node); + ret = check_leaf_items(root, node); + err |= ret; + return err; + } + + nr = btrfs_header_nritems(node); + btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) * + sizeof(struct btrfs_key_ptr); + + /* Then check all its children */ + for (i = 0; i < nr; i++) { + u64 blocknr = btrfs_node_blockptr(node, i); + + /* + * As a btrfs tree has most 8 levels (0..7), so it's quite safe + * to call the function itself. + */ + eb = read_tree_block(root, blocknr, root->nodesize, 0); + if (extent_buffer_uptodate(eb)) { + ret = traverse_tree_block(root, eb); + err |= ret; + } + free_extent_buffer(eb); + } + + return err; +} + +/* + * Low memory usage version check_chunks_and_extents. + */ +static int check_chunks_and_extents_v2(struct btrfs_root *root) +{ + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_root *root1; + struct btrfs_root *cur_root; + int err = 0; + int ret; + root1 = root->fs_info->chunk_root; - level = btrfs_header_level(root1->node); - ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, - btrfs_level_size(root1, level), NULL); - if (ret < 0) - goto out; + ret = traverse_tree_block(root1, root1->node); + err |= ret; + + root1 = root->fs_info->tree_root; + ret = traverse_tree_block(root1, root1->node); + err |= ret; + btrfs_init_path(&path); + key.objectid = BTRFS_EXTENT_TREE_OBJECTID; key.offset = 0; - key.objectid = 0; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - ret = btrfs_search_slot(NULL, root->fs_info->tree_root, - &key, &path, 0, 0); - if (ret < 0) - goto out; - while(1) { - leaf = path.nodes[0]; - slot = path.slots[0]; - if (slot >= btrfs_header_nritems(path.nodes[0])) { - ret = btrfs_next_leaf(root, &path); - if (ret != 0) - break; - leaf = path.nodes[0]; - slot = path.slots[0]; - } - btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); - if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) { - unsigned long offset; + key.type = BTRFS_ROOT_ITEM_KEY; - offset = btrfs_item_ptr_offset(leaf, path.slots[0]); - read_extent_buffer(leaf, &ri, offset, sizeof(ri)); - if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { - level = btrfs_root_level(&ri); - level_size = btrfs_level_size(root, level); - ret = add_root_item_to_list(&normal_trees, - found_key.objectid, - btrfs_root_bytenr(&ri), level, - 0, level_size, NULL); - if (ret < 0) - goto out; - } else { - level = btrfs_root_level(&ri); - level_size = btrfs_level_size(root, level); - objectid = found_key.objectid; - btrfs_disk_key_to_cpu(&found_key, - &ri.drop_progress); - ret = add_root_item_to_list(&dropping_trees, - objectid, - btrfs_root_bytenr(&ri), - level, ri.drop_level, - level_size, &found_key); - if (ret < 0) - goto out; - } - } - path.slots[0]++; - } - btrfs_release_path(&path); - ret = deal_root_from_list(&normal_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, - &chunk_cache, &dev_cache, &block_group_cache, - &dev_extent_cache); - if (ret < 0) - goto out; - ret = deal_root_from_list(&dropping_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, - &chunk_cache, &dev_cache, &block_group_cache, - &dev_extent_cache); - if (ret < 0) + ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0); + if (ret) { + error("cannot find extent treet in tree_root"); goto out; - if (ret >= 0) - ret = check_extent_refs(trans, root, &extent_cache); - if (ret == -EAGAIN) { - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto out; + } - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.type != BTRFS_ROOT_ITEM_KEY) + goto next; + key.offset = (u64)-1; - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - free_chunk_cache_tree(&chunk_cache); - free_block_group_tree(&block_group_cache); - free_device_cache_tree(&dev_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_record_cache(root->fs_info, &extent_cache); - goto again; - } + cur_root = btrfs_read_fs_root(root->fs_info, &key); + if (IS_ERR(cur_root) || !cur_root) { + error("failed to read tree: %lld", key.objectid); + goto next; + } - err = check_chunks(&chunk_cache, &block_group_cache, - &dev_extent_cache, NULL, NULL, NULL, 0); - if (err && !ret) - ret = err; + ret = traverse_tree_block(cur_root, cur_root->node); + err |= ret; - err = check_devices(&dev_cache, &dev_extent_cache); - if (err && !ret) - ret = err; +next: + ret = btrfs_next_item(root1, &path); + if (ret) + goto out; + } out: - if (trans) { - err = btrfs_commit_transaction(trans, root); - if (!ret) - ret = err; - } - if (repair) { - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); - root->fs_info->fsck_extent_cache = NULL; - root->fs_info->free_extent_hook = NULL; - root->fs_info->corrupt_blocks = NULL; - } - free(bits); - free_chunk_cache_tree(&chunk_cache); - free_device_cache_tree(&dev_cache); - free_block_group_tree(&block_group_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - return ret; + btrfs_release_path(&path); + return err; } static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, @@ -7941,7 +10027,7 @@ static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, goto init; } c = btrfs_alloc_free_block(trans, root, - btrfs_level_size(root, 0), + root->nodesize, root->root_key.objectid, &disk_key, level, 0, 0); if (IS_ERR(c)) { @@ -7998,7 +10084,7 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, struct btrfs_root_item *ri; struct btrfs_key key; u64 bytenr; - u32 leafsize; + u32 nodesize; int level = btrfs_header_level(eb); int nritems; int ret; @@ -8015,7 +10101,7 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, btrfs_pin_extent(fs_info, eb->start, eb->len); - leafsize = btrfs_super_leafsize(fs_info->super_copy); + nodesize = btrfs_super_nodesize(fs_info->super_copy); nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { if (level == 0) { @@ -8037,8 +10123,8 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, * just pass in extent_root. */ tmp = read_tree_block(fs_info->extent_root, bytenr, - leafsize, 0); - if (!tmp) { + nodesize, 0); + if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading root block\n"); return -EIO; } @@ -8051,13 +10137,13 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, /* If we aren't the tree root don't read the block */ if (level == 1 && !tree_root) { - btrfs_pin_extent(fs_info, bytenr, leafsize); + btrfs_pin_extent(fs_info, bytenr, nodesize); continue; } tmp = read_tree_block(fs_info->extent_root, bytenr, - leafsize, 0); - if (!tmp) { + nodesize, 0); + if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading tree block\n"); return -EIO; } @@ -8346,7 +10432,7 @@ static int reinit_extent_tree(struct btrfs_trans_handle *trans, ret = reset_balance(trans, fs_info); if (ret) - fprintf(stderr, "error reseting the pending balance\n"); + fprintf(stderr, "error resetting the pending balance\n"); return ret; } @@ -8474,8 +10560,142 @@ static int populate_csum(struct btrfs_trans_handle *trans, return ret; } -static int fill_csum_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *csum_root) +static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root, + struct btrfs_root *cur_root) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *node; + struct btrfs_file_extent_item *fi; + char *buf = NULL; + u64 start = 0; + u64 len = 0; + int slot = 0; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + buf = malloc(cur_root->fs_info->csum_root->sectorsize); + if (!buf) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.offset = 0; + key.type = 0; + + ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0); + if (ret < 0) + goto out; + /* Iterate all regular file extents and fill its csum */ + while (1) { + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != BTRFS_EXTENT_DATA_KEY) + goto next; + node = path->nodes[0]; + slot = path->slots[0]; + fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG) + goto next; + start = btrfs_file_extent_disk_bytenr(node, fi); + len = btrfs_file_extent_disk_num_bytes(node, fi); + + ret = populate_csum(trans, csum_root, buf, start, len); + if (ret == -EEXIST) + ret = 0; + if (ret < 0) + goto out; +next: + /* + * TODO: if next leaf is corrupted, jump to nearest next valid + * leaf. + */ + ret = btrfs_next_item(cur_root, path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + } + +out: + btrfs_free_path(path); + free(buf); + return ret; +} + +static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root) +{ + struct btrfs_fs_info *fs_info = csum_root->fs_info; + struct btrfs_path *path; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *cur_root; + struct extent_buffer *node; + struct btrfs_key key; + int slot = 0; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_FS_TREE_OBJECTID; + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + while (1) { + node = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(node, &key, slot); + if (key.objectid > BTRFS_LAST_FREE_OBJECTID) + goto out; + if (key.type != BTRFS_ROOT_ITEM_KEY) + goto next; + if (!is_fstree(key.objectid)) + goto next; + key.offset = (u64)-1; + + cur_root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(cur_root) || !cur_root) { + fprintf(stderr, "Fail to read fs/subvol tree: %lld\n", + key.objectid); + goto out; + } + ret = fill_csum_tree_from_one_fs_root(trans, csum_root, + cur_root); + if (ret < 0) + goto out; +next: + ret = btrfs_next_item(tree_root, path); + if (ret > 0) { + ret = 0; + goto out; + } + if (ret < 0) + goto out; + } + +out: + btrfs_free_path(path); + return ret; +} + +static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root) { struct btrfs_root *extent_root = csum_root->fs_info->extent_root; struct btrfs_path *path; @@ -8543,17 +10763,22 @@ static int fill_csum_tree(struct btrfs_trans_handle *trans, return ret; } -struct root_item_info { - /* level of the root */ - u8 level; - /* number of nodes at this level, must be 1 for a root */ - int node_count; - u64 bytenr; - u64 gen; - struct cache_extent cache_extent; -}; - -static struct cache_tree *roots_info_cache = NULL; +/* + * Recalculate the csum and put it into the csum tree. + * + * Extent tree init will wipe out all the extent info, so in that case, we + * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we + * will use fs/subvol trees to init the csum tree. + */ +static int fill_csum_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root, + int search_fs_tree) +{ + if (search_fs_tree) + return fill_csum_tree_from_fs(trans, csum_root); + else + return fill_csum_tree_from_extent(trans, csum_root); +} static void free_roots_info_cache(void) { @@ -8644,11 +10869,11 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) iref = (struct btrfs_extent_inline_ref *)(ei + 1); level = found_key.offset; } else { - struct btrfs_tree_block_info *info; + struct btrfs_tree_block_info *binfo; - info = (struct btrfs_tree_block_info *)(ei + 1); - iref = (struct btrfs_extent_inline_ref *)(info + 1); - level = btrfs_tree_block_level(leaf, info); + binfo = (struct btrfs_tree_block_info *)(ei + 1); + iref = (struct btrfs_extent_inline_ref *)(binfo + 1); + level = btrfs_tree_block_level(leaf, binfo); } /* @@ -8856,6 +11081,8 @@ again: if (found_key.type != BTRFS_ROOT_ITEM_KEY) goto next; + if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + goto next; ret = maybe_repair_root_item(info, path, &found_key, trans ? 0 : 1); @@ -8876,8 +11103,9 @@ next: ret = 0; out: free_roots_info_cache(); - if (path) - btrfs_free_path(path); + btrfs_free_path(path); + if (trans) + btrfs_commit_transaction(trans, info->tree_root); if (ret < 0) return ret; @@ -8886,17 +11114,26 @@ out: const char * const cmd_check_usage[] = { "btrfs check [options] ", - "Check an unmounted btrfs filesystem.", + "Check structural integrity of a filesystem (unmounted).", + "Check structural integrity of an unmounted filesystem. Verify internal", + "trees' consistency and item connectivity. In the repair mode try to", + "fix the problems found.", + "WARNING: the repair mode is considered dangerous", "", "-s|--super use this superblock copy", - "-b|--backup use the backup root copy", + "-b|--backup use the first valid backup root copy", "--repair try to repair the filesystem", + "--readonly run in read-only mode (default)", "--init-csum-tree create a new CRC tree", "--init-extent-tree create a new extent tree", - "--check-data-csum verify checkums of data blocks", - "--qgroup-report print a report on qgroup consistency", - "--subvol-extents print subvolume extents and sharing state", - "--tree-root use the given bytenr for the tree root", + "--low-memory check in low memory usage mode(experimental)", + "--check-data-csum verify checksums of data blocks", + "-Q|--qgroup-report print a report on qgroup consistency", + "-E|--subvol-extents ", + " print subvolume extents and sharing state", + "-r|--tree-root use the given bytenr for the tree root", + "--chunk-root use the given bytenr for the chunk tree root", + "-p|--progress indicate progress", NULL }; @@ -8908,35 +11145,45 @@ int cmd_check(int argc, char **argv) u64 bytenr = 0; u64 subvolid = 0; u64 tree_root_bytenr = 0; + u64 chunk_root_bytenr = 0; char uuidbuf[BTRFS_UUID_UNPARSED_SIZE]; int ret; u64 num; int init_csum_tree = 0; int readonly = 0; int qgroup_report = 0; + int qgroups_repaired = 0; enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE; while(1) { int c; - int option_index = 0; - enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT, - OPT_CHECK_CSUM, OPT_READONLY }; + enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM, + GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM, + GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE, + GETOPT_VAL_LOW_MEMORY }; static const struct option long_options[] = { - { "super", 1, NULL, 's' }, - { "repair", 0, NULL, OPT_REPAIR }, - { "readonly", 0, NULL, OPT_READONLY }, - { "init-csum-tree", 0, NULL, OPT_INIT_CSUM }, - { "init-extent-tree", 0, NULL, OPT_INIT_EXTENT }, - { "check-data-csum", 0, NULL, OPT_CHECK_CSUM }, - { "backup", 0, NULL, 'b' }, - { "subvol-extents", 1, NULL, 'E' }, - { "qgroup-report", 0, NULL, 'Q' }, - { "tree-root", 1, NULL, 'r' }, + { "super", required_argument, NULL, 's' }, + { "repair", no_argument, NULL, GETOPT_VAL_REPAIR }, + { "readonly", no_argument, NULL, GETOPT_VAL_READONLY }, + { "init-csum-tree", no_argument, NULL, + GETOPT_VAL_INIT_CSUM }, + { "init-extent-tree", no_argument, NULL, + GETOPT_VAL_INIT_EXTENT }, + { "check-data-csum", no_argument, NULL, + GETOPT_VAL_CHECK_CSUM }, + { "backup", no_argument, NULL, 'b' }, + { "subvol-extents", required_argument, NULL, 'E' }, + { "qgroup-report", no_argument, NULL, 'Q' }, + { "tree-root", required_argument, NULL, 'r' }, + { "chunk-root", required_argument, NULL, + GETOPT_VAL_CHUNK_TREE }, + { "progress", no_argument, NULL, 'p' }, + { "low-memory", no_argument, NULL, + GETOPT_VAL_LOW_MEMORY }, { NULL, 0, NULL, 0} }; - c = getopt_long(argc, argv, "as:br:", long_options, - &option_index); + c = getopt_long(argc, argv, "as:br:p", long_options, NULL); if (c < 0) break; switch(c) { @@ -8965,45 +11212,66 @@ int cmd_check(int argc, char **argv) case 'r': tree_root_bytenr = arg_strtou64(optarg); break; + case GETOPT_VAL_CHUNK_TREE: + chunk_root_bytenr = arg_strtou64(optarg); + break; + case 'p': + ctx.progress_enabled = true; + break; case '?': case 'h': usage(cmd_check_usage); - case OPT_REPAIR: + case GETOPT_VAL_REPAIR: printf("enabling repair mode\n"); repair = 1; ctree_flags |= OPEN_CTREE_WRITES; break; - case OPT_READONLY: + case GETOPT_VAL_READONLY: readonly = 1; break; - case OPT_INIT_CSUM: + case GETOPT_VAL_INIT_CSUM: printf("Creating a new CRC tree\n"); init_csum_tree = 1; repair = 1; ctree_flags |= OPEN_CTREE_WRITES; break; - case OPT_INIT_EXTENT: + case GETOPT_VAL_INIT_EXTENT: init_extent_tree = 1; ctree_flags |= (OPEN_CTREE_WRITES | OPEN_CTREE_NO_BLOCK_GROUPS); repair = 1; break; - case OPT_CHECK_CSUM: + case GETOPT_VAL_CHECK_CSUM: check_data_csum = 1; break; + case GETOPT_VAL_LOW_MEMORY: + low_memory = 1; + break; } } - argc = argc - optind; - if (check_argc_exact(argc, 1)) + if (check_argc_exact(argc - optind, 1)) usage(cmd_check_usage); + if (ctx.progress_enabled) { + ctx.tp = TASK_NOTHING; + ctx.info = task_init(print_status_check, print_status_return, &ctx); + } + /* This check is the only reason for --readonly to exist */ if (readonly && repair) { fprintf(stderr, "Repair options are not compatible with --readonly\n"); exit(1); } + /* + * Not supported yet + */ + if (repair && low_memory) { + error("Low memory mode doesn't support repair yet"); + exit(1); + } + radix_tree_init(); cache_tree_init(&root_cache); @@ -9021,13 +11289,14 @@ int cmd_check(int argc, char **argv) ctree_flags |= OPEN_CTREE_PARTIAL; info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr, - ctree_flags); + chunk_root_bytenr, ctree_flags); if (!info) { fprintf(stderr, "Couldn't open file system\n"); ret = -EIO; goto err_out; } + global_info = info; root = info->fs_root; /* @@ -9053,7 +11322,7 @@ int cmd_check(int argc, char **argv) uuidbuf); ret = qgroup_verify_all(info); if (ret == 0) - print_qgroup_report(1); + report_qgroups(1); goto close_out; } if (subvolid) { @@ -9098,7 +11367,8 @@ int cmd_check(int argc, char **argv) goto close_out; } - ret = fill_csum_tree(trans, info->csum_root); + ret = fill_csum_tree(trans, info->csum_root, + init_extent_tree); if (ret) { fprintf(stderr, "crc refilling failed\n"); return -EIO; @@ -9123,8 +11393,12 @@ int cmd_check(int argc, char **argv) goto close_out; } - fprintf(stderr, "checking extents\n"); - ret = check_chunks_and_extents(root); + if (!ctx.progress_enabled) + fprintf(stderr, "checking extents\n"); + if (low_memory) + ret = check_chunks_and_extents_v2(root); + else + ret = check_chunks_and_extents(root); if (ret) fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n"); @@ -9144,7 +11418,12 @@ int cmd_check(int argc, char **argv) goto close_out; } - fprintf(stderr, "checking free space cache\n"); + if (!ctx.progress_enabled) { + if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) + fprintf(stderr, "checking free space tree\n"); + else + fprintf(stderr, "checking free space cache\n"); + } ret = check_space_cache(root); if (ret) goto out; @@ -9157,7 +11436,8 @@ int cmd_check(int argc, char **argv) */ no_holes = btrfs_fs_incompat(root->fs_info, BTRFS_FEATURE_INCOMPAT_NO_HOLES); - fprintf(stderr, "checking fs roots\n"); + if (!ctx.progress_enabled) + fprintf(stderr, "checking fs roots\n"); ret = check_fs_roots(root, &root_cache); if (ret) goto out; @@ -9199,6 +11479,10 @@ int cmd_check(int argc, char **argv) err = qgroup_verify_all(info); if (err) goto out; + report_qgroups(0); + err = repair_qgroups(info, &qgroups_repaired); + if (err) + goto out; } if (!list_empty(&root->fs_info->recow_ebs)) { @@ -9206,7 +11490,10 @@ int cmd_check(int argc, char **argv) ret = 1; } out: - print_qgroup_report(0); + /* Don't override original ret */ + if (!ret && qgroups_repaired) + ret = qgroups_repaired; + if (found_old_backref) { /* * there was a disk format change when mixed * backref was in testing tree. The old format @@ -9232,11 +11519,14 @@ out: printf("file data blocks allocated: %llu\n referenced %llu\n", (unsigned long long)data_bytes_allocated, (unsigned long long)data_bytes_referenced); - printf("%s\n", PACKAGE_STRING); + free_qgroup_counts(); free_root_recs_tree(&root_cache); close_out: close_ctree(root); err_out: + if (ctx.progress_enabled) + task_deinit(ctx.info); + return ret; }