X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=cmds-check.c;h=8c2ff3f9bae3a074b3563be16d9caa0bd1af556c;hb=ae5271dd52e36d8be04113f5f30b7764fdf1e9ca;hp=a2e8ebeb1be4df878722f24cf3bb823e177b27cd;hpb=2b7cdab42529bc4ed4c36a3659504e50f0ef700c;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/cmds-check.c b/cmds-check.c index a2e8ebe..8c2ff3f 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -16,8 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#define _XOPEN_SOURCE 500 -#define _GNU_SOURCE 1 #include #include #include @@ -32,17 +30,32 @@ #include "repair.h" #include "disk-io.h" #include "print-tree.h" +#include "task-utils.h" #include "transaction.h" -#include "version.h" #include "utils.h" #include "commands.h" #include "free-space-cache.h" +#include "free-space-tree.h" #include "btrfsck.h" #include "qgroup-verify.h" #include "rbtree-utils.h" #include "backref.h" #include "ulist.h" +enum task_position { + TASK_EXTENTS, + TASK_FREE_SPACE, + TASK_FS_ROOTS, + TASK_NOTHING, /* have to be the last element */ +}; + +struct task_ctx { + int progress_enabled; + enum task_position tp; + + struct task_info *info; +}; + static u64 bytes_used = 0; static u64 total_csum_bytes = 0; static u64 total_btree_bytes = 0; @@ -54,13 +67,15 @@ static u64 data_bytes_referenced = 0; static int found_old_backref = 0; static LIST_HEAD(duplicate_extents); static LIST_HEAD(delete_items); -static int repair = 0; static int no_holes = 0; static int init_extent_tree = 0; static int check_data_csum = 0; +static struct btrfs_fs_info *global_info; +static struct task_ctx ctx = { 0 }; +static struct cache_tree *roots_info_cache = NULL; struct extent_backref { - struct list_head list; + struct rb_node node; unsigned int is_data:1; unsigned int found_extent_tree:1; unsigned int full_backref:1; @@ -68,6 +83,11 @@ struct extent_backref { unsigned int broken:1; }; +static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node) +{ + return rb_entry(node, struct extent_backref, node); +} + struct data_backref { struct extent_backref node; union { @@ -83,6 +103,76 @@ struct data_backref { u32 found_ref; }; +static inline struct data_backref* to_data_backref(struct extent_backref *back) +{ + return container_of(back, struct data_backref, node); +} + +static int compare_data_backref(struct rb_node *node1, struct rb_node *node2) +{ + struct extent_backref *ext1 = rb_node_to_extent_backref(node1); + struct extent_backref *ext2 = rb_node_to_extent_backref(node2); + struct data_backref *back1 = to_data_backref(ext1); + struct data_backref *back2 = to_data_backref(ext2); + + WARN_ON(!ext1->is_data); + WARN_ON(!ext2->is_data); + + /* parent and root are a union, so this covers both */ + if (back1->parent > back2->parent) + return 1; + if (back1->parent < back2->parent) + return -1; + + /* This is a full backref and the parents match. */ + if (back1->node.full_backref) + return 0; + + if (back1->owner > back2->owner) + return 1; + if (back1->owner < back2->owner) + return -1; + + if (back1->offset > back2->offset) + return 1; + if (back1->offset < back2->offset) + return -1; + + if (back1->bytes > back2->bytes) + return 1; + if (back1->bytes < back2->bytes) + return -1; + + if (back1->found_ref && back2->found_ref) { + if (back1->disk_bytenr > back2->disk_bytenr) + return 1; + if (back1->disk_bytenr < back2->disk_bytenr) + return -1; + + if (back1->found_ref > back2->found_ref) + return 1; + if (back1->found_ref < back2->found_ref) + return -1; + } + + return 0; +} + +/* + * Much like data_backref, just removed the undetermined members + * and change it to use list_head. + * During extent scan, it is stored in root->orphan_data_extent. + * During fs tree scan, it is then moved to inode_rec->orphan_data_extents. + */ +struct orphan_data_extent { + struct list_head list; + u64 root; + u64 objectid; + u64 offset; + u64 disk_bytenr; + u64 disk_len; +}; + struct tree_backref { struct extent_backref node; union { @@ -91,9 +181,59 @@ struct tree_backref { }; }; +static inline struct tree_backref* to_tree_backref(struct extent_backref *back) +{ + return container_of(back, struct tree_backref, node); +} + +static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2) +{ + struct extent_backref *ext1 = rb_node_to_extent_backref(node1); + struct extent_backref *ext2 = rb_node_to_extent_backref(node2); + struct tree_backref *back1 = to_tree_backref(ext1); + struct tree_backref *back2 = to_tree_backref(ext2); + + WARN_ON(ext1->is_data); + WARN_ON(ext2->is_data); + + /* parent and root are a union, so this covers both */ + if (back1->parent > back2->parent) + return 1; + if (back1->parent < back2->parent) + return -1; + + return 0; +} + +static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2) +{ + struct extent_backref *ext1 = rb_node_to_extent_backref(node1); + struct extent_backref *ext2 = rb_node_to_extent_backref(node2); + + if (ext1->is_data > ext2->is_data) + return 1; + + if (ext1->is_data < ext2->is_data) + return -1; + + if (ext1->full_backref > ext2->full_backref) + return 1; + if (ext1->full_backref < ext2->full_backref) + return -1; + + if (ext1->is_data) + return compare_data_backref(node1, node2); + else + return compare_tree_backref(node1, node2); +} + +/* Explicit initialization for extent_record::flag_block_full_backref */ +enum { FLAG_UNSET = 2 }; + struct extent_record { struct list_head backrefs; struct list_head dups; + struct rb_root backref_tree; struct list_head list; struct cache_extent cache; struct btrfs_disk_key parent_key; @@ -107,14 +247,22 @@ struct extent_record { u64 info_objectid; u32 num_duplicates; u8 info_level; + unsigned int flag_block_full_backref:2; unsigned int found_rec:1; unsigned int content_checked:1; unsigned int owner_ref_checked:1; unsigned int is_root:1; unsigned int metadata:1; - unsigned int flag_block_full_backref:1; + unsigned int bad_full_backref:1; + unsigned int crossing_stripes:1; + unsigned int wrong_chunk_type:1; }; +static inline struct extent_record* to_extent_record(struct list_head *entry) +{ + return container_of(entry, struct extent_record, list); +} + struct inode_backref { struct list_head list; unsigned int found_dir_item:1; @@ -129,10 +277,16 @@ struct inode_backref { char name[0]; }; +static inline struct inode_backref* to_inode_backref(struct list_head *entry) +{ + return list_entry(entry, struct inode_backref, list); +} + struct root_item_record { struct list_head list; u64 objectid; u64 bytenr; + u64 last_snapshot; u8 level; u8 drop_level; int level_size; @@ -153,6 +307,12 @@ struct root_item_record { #define REF_ERR_DUP_ROOT_REF (1 << 11) #define REF_ERR_DUP_ROOT_BACKREF (1 << 12) +struct file_extent_hole { + struct rb_node node; + u64 start; + u64 len; +}; + struct inode_record { struct list_head backrefs; unsigned int checked:1; @@ -175,7 +335,8 @@ struct inode_record { u64 found_size; u64 extent_start; u64 extent_end; - u64 first_extent_gap; + struct rb_root holes; + struct list_head orphan_extents; u32 refs; }; @@ -194,6 +355,7 @@ struct inode_record { #define I_ERR_ODD_CSUM_ITEM (1 << 11) #define I_ERR_SOME_CSUM_MISSING (1 << 12) #define I_ERR_LINK_COUNT_WRONG (1 << 13) +#define I_ERR_FILE_EXTENT_ORPHAN (1 << 14) struct root_backref { struct list_head list; @@ -210,6 +372,11 @@ struct root_backref { char name[0]; }; +static inline struct root_backref* to_root_backref(struct list_head *entry) +{ + return list_entry(entry, struct root_backref, list); +} + struct root_record { struct list_head backrefs; struct cache_extent cache; @@ -249,6 +416,260 @@ struct bad_item { struct list_head list; }; +struct extent_entry { + u64 bytenr; + u64 bytes; + int count; + int broken; + struct list_head list; +}; + +struct root_item_info { + /* level of the root */ + u8 level; + /* number of nodes at this level, must be 1 for a root */ + int node_count; + u64 bytenr; + u64 gen; + struct cache_extent cache_extent; +}; + +/* + * Error bit for low memory mode check. + * + * Currently no caller cares about it yet. Just internal use for error + * classification. + */ +#define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */ +#define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */ +#define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */ + +static void *print_status_check(void *p) +{ + struct task_ctx *priv = p; + const char work_indicator[] = { '.', 'o', 'O', 'o' }; + uint32_t count = 0; + static char *task_position_string[] = { + "checking extents", + "checking free space cache", + "checking fs roots", + }; + + task_period_start(priv->info, 1000 /* 1s */); + + if (priv->tp == TASK_NOTHING) + return NULL; + + while (1) { + printf("%s [%c]\r", task_position_string[priv->tp], + work_indicator[count % 4]); + count++; + fflush(stdout); + task_period_wait(priv->info); + } + return NULL; +} + +static int print_status_return(void *p) +{ + printf("\n"); + fflush(stdout); + + return 0; +} + +/* Compatible function to allow reuse of old codes */ +static u64 first_extent_gap(struct rb_root *holes) +{ + struct file_extent_hole *hole; + + if (RB_EMPTY_ROOT(holes)) + return (u64)-1; + + hole = rb_entry(rb_first(holes), struct file_extent_hole, node); + return hole->start; +} + +static int compare_hole(struct rb_node *node1, struct rb_node *node2) +{ + struct file_extent_hole *hole1; + struct file_extent_hole *hole2; + + hole1 = rb_entry(node1, struct file_extent_hole, node); + hole2 = rb_entry(node2, struct file_extent_hole, node); + + if (hole1->start > hole2->start) + return -1; + if (hole1->start < hole2->start) + return 1; + /* Now hole1->start == hole2->start */ + if (hole1->len >= hole2->len) + /* + * Hole 1 will be merge center + * Same hole will be merged later + */ + return -1; + /* Hole 2 will be merge center */ + return 1; +} + +/* + * Add a hole to the record + * + * This will do hole merge for copy_file_extent_holes(), + * which will ensure there won't be continuous holes. + */ +static int add_file_extent_hole(struct rb_root *holes, + u64 start, u64 len) +{ + struct file_extent_hole *hole; + struct file_extent_hole *prev = NULL; + struct file_extent_hole *next = NULL; + + hole = malloc(sizeof(*hole)); + if (!hole) + return -ENOMEM; + hole->start = start; + hole->len = len; + /* Since compare will not return 0, no -EEXIST will happen */ + rb_insert(holes, &hole->node, compare_hole); + + /* simple merge with previous hole */ + if (rb_prev(&hole->node)) + prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole, + node); + if (prev && prev->start + prev->len >= hole->start) { + hole->len = hole->start + hole->len - prev->start; + hole->start = prev->start; + rb_erase(&prev->node, holes); + free(prev); + prev = NULL; + } + + /* iterate merge with next holes */ + while (1) { + if (!rb_next(&hole->node)) + break; + next = rb_entry(rb_next(&hole->node), struct file_extent_hole, + node); + if (hole->start + hole->len >= next->start) { + if (hole->start + hole->len <= next->start + next->len) + hole->len = next->start + next->len - + hole->start; + rb_erase(&next->node, holes); + free(next); + next = NULL; + } else + break; + } + return 0; +} + +static int compare_hole_range(struct rb_node *node, void *data) +{ + struct file_extent_hole *hole; + u64 start; + + hole = (struct file_extent_hole *)data; + start = hole->start; + + hole = rb_entry(node, struct file_extent_hole, node); + if (start < hole->start) + return -1; + if (start >= hole->start && start < hole->start + hole->len) + return 0; + return 1; +} + +/* + * Delete a hole in the record + * + * This will do the hole split and is much restrict than add. + */ +static int del_file_extent_hole(struct rb_root *holes, + u64 start, u64 len) +{ + struct file_extent_hole *hole; + struct file_extent_hole tmp; + u64 prev_start = 0; + u64 prev_len = 0; + u64 next_start = 0; + u64 next_len = 0; + struct rb_node *node; + int have_prev = 0; + int have_next = 0; + int ret = 0; + + tmp.start = start; + tmp.len = len; + node = rb_search(holes, &tmp, compare_hole_range, NULL); + if (!node) + return -EEXIST; + hole = rb_entry(node, struct file_extent_hole, node); + if (start + len > hole->start + hole->len) + return -EEXIST; + + /* + * Now there will be no overlap, delete the hole and re-add the + * split(s) if they exists. + */ + if (start > hole->start) { + prev_start = hole->start; + prev_len = start - hole->start; + have_prev = 1; + } + if (hole->start + hole->len > start + len) { + next_start = start + len; + next_len = hole->start + hole->len - start - len; + have_next = 1; + } + rb_erase(node, holes); + free(hole); + if (have_prev) { + ret = add_file_extent_hole(holes, prev_start, prev_len); + if (ret < 0) + return ret; + } + if (have_next) { + ret = add_file_extent_hole(holes, next_start, next_len); + if (ret < 0) + return ret; + } + return 0; +} + +static int copy_file_extent_holes(struct rb_root *dst, + struct rb_root *src) +{ + struct file_extent_hole *hole; + struct rb_node *node; + int ret = 0; + + node = rb_first(src); + while (node) { + hole = rb_entry(node, struct file_extent_hole, node); + ret = add_file_extent_hole(dst, hole->start, hole->len); + if (ret) + break; + node = rb_next(node); + } + return ret; +} + +static void free_file_extent_holes(struct rb_root *holes) +{ + struct rb_node *node; + struct file_extent_hole *hole; + + node = rb_first(holes); + while (node) { + hole = rb_entry(node, struct file_extent_hole, node); + rb_erase(node, holes); + free(hole); + node = rb_first(holes); + } +} + static void reset_cached_block_groups(struct btrfs_fs_info *fs_info); static void record_root_in_trans(struct btrfs_trans_handle *trans, @@ -299,20 +720,77 @@ static struct inode_record *clone_inode_rec(struct inode_record *orig_rec) struct inode_record *rec; struct inode_backref *backref; struct inode_backref *orig; + struct inode_backref *tmp; + struct orphan_data_extent *src_orphan; + struct orphan_data_extent *dst_orphan; size_t size; + int ret; rec = malloc(sizeof(*rec)); + if (!rec) + return ERR_PTR(-ENOMEM); memcpy(rec, orig_rec, sizeof(*rec)); rec->refs = 1; INIT_LIST_HEAD(&rec->backrefs); + INIT_LIST_HEAD(&rec->orphan_extents); + rec->holes = RB_ROOT; list_for_each_entry(orig, &orig_rec->backrefs, list) { size = sizeof(*orig) + orig->namelen + 1; backref = malloc(size); + if (!backref) { + ret = -ENOMEM; + goto cleanup; + } memcpy(backref, orig, size); list_add_tail(&backref->list, &rec->backrefs); } + list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) { + dst_orphan = malloc(sizeof(*dst_orphan)); + if (!dst_orphan) { + ret = -ENOMEM; + goto cleanup; + } + memcpy(dst_orphan, src_orphan, sizeof(*src_orphan)); + list_add_tail(&dst_orphan->list, &rec->orphan_extents); + } + ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes); + BUG_ON(ret < 0); + return rec; + +cleanup: + if (!list_empty(&rec->backrefs)) + list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) { + list_del(&orig->list); + free(orig); + } + + if (!list_empty(&rec->orphan_extents)) + list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) { + list_del(&orig->list); + free(orig); + } + + free(rec); + + return ERR_PTR(ret); +} + +static void print_orphan_data_extents(struct list_head *orphan_extents, + u64 objectid) +{ + struct orphan_data_extent *orphan; + + if (list_empty(orphan_extents)) + return; + printf("The following data extent is lost in tree %llu:\n", + objectid); + list_for_each_entry(orphan, orphan_extents, list) { + printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n", + orphan->objectid, orphan->offset, orphan->disk_bytenr, + orphan->disk_len); + } } static void print_inode_error(struct btrfs_root *root, struct inode_record *rec) @@ -359,7 +837,32 @@ static void print_inode_error(struct btrfs_root *root, struct inode_record *rec) fprintf(stderr, ", some csum missing"); if (errors & I_ERR_LINK_COUNT_WRONG) fprintf(stderr, ", link count wrong"); + if (errors & I_ERR_FILE_EXTENT_ORPHAN) + fprintf(stderr, ", orphan file extent"); fprintf(stderr, "\n"); + /* Print the orphan extents if needed */ + if (errors & I_ERR_FILE_EXTENT_ORPHAN) + print_orphan_data_extents(&rec->orphan_extents, root->objectid); + + /* Print the holes if needed */ + if (errors & I_ERR_FILE_EXTENT_DISCOUNT) { + struct file_extent_hole *hole; + struct rb_node *node; + int found = 0; + + node = rb_first(&rec->holes); + fprintf(stderr, "Found file extent holes:\n"); + while (node) { + found = 1; + hole = rb_entry(node, struct file_extent_hole, node); + fprintf(stderr, "\tstart: %llu, len: %llu\n", + hole->start, hole->len); + node = rb_next(node); + } + if (!found) + fprintf(stderr, "\tstart: 0, len: %llu\n", + round_up(rec->isize, root->sectorsize)); + } } static void print_ref_error(int errors) @@ -377,9 +880,9 @@ static void print_ref_error(int errors) if (errors & REF_ERR_DUP_INODE_REF) fprintf(stderr, ", dup inode ref"); if (errors & REF_ERR_INDEX_UNMATCH) - fprintf(stderr, ", index unmatch"); + fprintf(stderr, ", index mismatch"); if (errors & REF_ERR_FILETYPE_UNMATCH) - fprintf(stderr, ", filetype unmatch"); + fprintf(stderr, ", filetype mismatch"); if (errors & REF_ERR_NAME_TOO_LONG) fprintf(stderr, ", name too long"); if (errors & REF_ERR_NO_ROOT_REF) @@ -407,18 +910,27 @@ static struct inode_record *get_inode_rec(struct cache_tree *inode_cache, rec = node->data; if (mod && rec->refs > 1) { node->data = clone_inode_rec(rec); + if (IS_ERR(node->data)) + return node->data; rec->refs--; rec = node->data; } } else if (mod) { rec = calloc(1, sizeof(*rec)); + if (!rec) + return ERR_PTR(-ENOMEM); rec->ino = ino; rec->extent_start = (u64)-1; - rec->first_extent_gap = (u64)-1; rec->refs = 1; INIT_LIST_HEAD(&rec->backrefs); + INIT_LIST_HEAD(&rec->orphan_extents); + rec->holes = RB_ROOT; node = malloc(sizeof(*node)); + if (!node) { + free(rec); + return ERR_PTR(-ENOMEM); + } node->cache.start = ino; node->cache.size = 1; node->data = rec; @@ -427,11 +939,24 @@ static struct inode_record *get_inode_rec(struct cache_tree *inode_cache, rec->found_link = 1; ret = insert_cache_extent(inode_cache, &node->cache); - BUG_ON(ret); + if (ret) + return ERR_PTR(-EEXIST); } return rec; } +static void free_orphan_data_extents(struct list_head *orphan_extents) +{ + struct orphan_data_extent *orphan; + + while (!list_empty(orphan_extents)) { + orphan = list_entry(orphan_extents->next, + struct orphan_data_extent, list); + list_del(&orphan->list); + free(orphan); + } +} + static void free_inode_rec(struct inode_record *rec) { struct inode_backref *backref; @@ -440,11 +965,12 @@ static void free_inode_rec(struct inode_record *rec) return; while (!list_empty(&rec->backrefs)) { - backref = list_entry(rec->backrefs.next, - struct inode_backref, list); + backref = to_inode_backref(rec->backrefs.next); list_del(&backref->list); free(backref); } + free_orphan_data_extents(&rec->orphan_extents); + free_file_extent_holes(&rec->holes); free(rec); } @@ -472,7 +998,8 @@ static void maybe_free_inode_rec(struct cache_tree *inode_cache, if (backref->found_dir_item && backref->found_dir_index) { if (backref->filetype != filetype) backref->errors |= REF_ERR_FILETYPE_UNMATCH; - if (!backref->errors && backref->found_inode_ref) { + if (!backref->errors && backref->found_inode_ref && + rec->nlink == rec->found_link) { list_del(&backref->list); free(backref); } @@ -492,11 +1019,9 @@ static void maybe_free_inode_rec(struct cache_tree *inode_cache, rec->errors |= I_ERR_ODD_DIR_ITEM; if (rec->found_size != rec->nbytes) rec->errors |= I_ERR_FILE_NBYTES_WRONG; - if (rec->extent_start == (u64)-1 || rec->extent_start > 0) - rec->first_extent_gap = 0; if (rec->nlink > 0 && !no_holes && (rec->extent_end < rec->isize || - rec->first_extent_gap < rec->isize)) + first_extent_gap(&rec->holes) < rec->isize)) rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT; } @@ -580,6 +1105,8 @@ static struct inode_backref *get_inode_backref(struct inode_record *rec, } backref = malloc(sizeof(*backref) + namelen + 1); + if (!backref) + return NULL; memset(backref, 0, sizeof(*backref)); backref->dir = dir; backref->namelen = namelen; @@ -598,7 +1125,9 @@ static int add_inode_backref(struct cache_tree *inode_cache, struct inode_backref *backref; rec = get_inode_rec(inode_cache, ino, 1); + BUG_ON(IS_ERR(rec)); backref = get_inode_backref(rec, name, namelen, dir); + BUG_ON(!backref); if (errors) backref->errors |= errors; if (itemtype == BTRFS_DIR_INDEX_KEY) { @@ -645,6 +1174,7 @@ static int merge_inode_recs(struct inode_record *src, struct inode_record *dst, { struct inode_backref *backref; u32 dir_count = 0; + int ret = 0; dst->merging = 1; list_for_each_entry(backref, &src->backrefs, list) { @@ -677,8 +1207,11 @@ static int merge_inode_recs(struct inode_record *src, struct inode_record *dst, dst->found_csum_item = 1; if (src->some_csum_missing) dst->some_csum_missing = 1; - if (dst->first_extent_gap > src->first_extent_gap) - dst->first_extent_gap = src->first_extent_gap; + if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) { + ret = copy_file_extent_holes(&dst->holes, &src->holes); + if (ret < 0) + return ret; + } BUG_ON(src->found_link < dir_count); dst->found_link += src->found_link - dir_count; @@ -690,9 +1223,11 @@ static int merge_inode_recs(struct inode_record *src, struct inode_record *dst, } else { if (dst->extent_end > src->extent_start) dst->errors |= I_ERR_FILE_EXTENT_OVERLAP; - else if (dst->extent_end < src->extent_start && - dst->extent_end < dst->first_extent_gap) - dst->first_extent_gap = dst->extent_end; + else if (dst->extent_end < src->extent_start) { + ret = add_file_extent_hole(&dst->holes, + dst->extent_end, + src->extent_start - dst->extent_end); + } if (dst->extent_end < src->extent_end) dst->extent_end = src->extent_end; } @@ -746,6 +1281,7 @@ again: ins = node; } else { ins = malloc(sizeof(*ins)); + BUG_ON(!ins); ins->cache.start = node->cache.start; ins->cache.size = node->cache.size; ins->data = rec; @@ -754,6 +1290,7 @@ again: ret = insert_cache_extent(dst, &ins->cache); if (ret == -EEXIST) { conflict = get_inode_rec(dst, rec->ino, 1); + BUG_ON(IS_ERR(conflict)); merge_inode_recs(rec, conflict, dst); if (rec->checked) { conflict->checked = 1; @@ -781,6 +1318,7 @@ again: maybe_free_inode_rec(dst, dst_node->current); } dst_node->current = get_inode_rec(dst, current_ino, 1); + BUG_ON(IS_ERR(dst_node->current)); } return 0; } @@ -818,6 +1356,8 @@ static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs) struct shared_node *node; node = calloc(1, sizeof(*node)); + if (!node) + return -ENOMEM; node->cache.start = bytenr; node->cache.size = 1; cache_tree_init(&node->root_cache); @@ -825,8 +1365,8 @@ static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs) node->refs = refs; ret = insert_cache_extent(shared, &node->cache); - BUG_ON(ret); - return 0; + + return ret; } static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs, @@ -834,6 +1374,7 @@ static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs, { struct shared_node *node; struct shared_node *dest; + int ret; if (level == wc->active_node) return 0; @@ -841,7 +1382,8 @@ static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs, BUG_ON(wc->active_node <= level); node = find_shared_node(&wc->shared, bytenr); if (!node) { - add_shared_node(&wc->shared, bytenr, refs); + ret = add_shared_node(&wc->shared, bytenr, refs); + BUG_ON(ret); node = find_shared_node(&wc->shared, bytenr); wc->nodes[level] = node; wc->active_node = level; @@ -1217,9 +1759,12 @@ static int process_file_extent(struct btrfs_root *root, if (rec->extent_end > key->offset) rec->errors |= I_ERR_FILE_EXTENT_OVERLAP; - else if (rec->extent_end < key->offset && - rec->extent_end < rec->first_extent_gap) - rec->first_extent_gap = rec->extent_end; + else if (rec->extent_end < key->offset) { + ret = add_file_extent_hole(&rec->holes, rec->extent_end, + key->offset - rec->extent_end); + if (ret < 0) + return ret; + } fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); extent_type = btrfs_file_extent_type(eb, fi); @@ -1252,7 +1797,16 @@ static int process_file_extent(struct btrfs_root *root, } rec->extent_end = key->offset + num_bytes; - if (disk_bytenr > 0) { + /* + * The data reloc tree will copy full extents into its inode and then + * copy the corresponding csums. Because the extent it copied could be + * a preallocated extent that hasn't been written to yet there may be no + * csums to copy, ergo we won't have csums for our file extent. This is + * ok so just don't bother checking csums if the inode belongs to the + * data reloc tree. + */ + if (disk_bytenr > 0 && + btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) { u64 found; if (btrfs_file_extent_compression(eb, fi)) num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); @@ -1309,6 +1863,7 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb, } active_node->current = get_inode_rec(inode_cache, key.objectid, 1); + BUG_ON(IS_ERR(active_node->current)); } switch (key.type) { case BTRFS_DIR_ITEM_KEY: @@ -1350,7 +1905,7 @@ static void reada_walk_down(struct btrfs_root *root, return; nritems = btrfs_header_nritems(node); - blocksize = btrfs_level_size(root, level - 1); + blocksize = root->nodesize; for (i = slot; i < nritems; i++) { bytenr = btrfs_node_blockptr(node, i); ptr_gen = btrfs_node_ptr_generation(node, i); @@ -1408,8 +1963,14 @@ static int check_child_node(struct btrfs_root *root, return ret; } +struct node_refs { + u64 bytenr[BTRFS_MAX_LEVEL]; + u64 refs[BTRFS_MAX_LEVEL]; +}; + static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, - struct walk_control *wc, int *level) + struct walk_control *wc, int *level, + struct node_refs *nrefs) { enum btrfs_tree_block_status status; u64 bytenr; @@ -1422,12 +1983,20 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = btrfs_lookup_extent_info(NULL, root, + + if (path->nodes[*level]->start == nrefs->bytenr[*level]) { + refs = nrefs->refs[*level]; + ret = 0; + } else { + ret = btrfs_lookup_extent_info(NULL, root, path->nodes[*level]->start, *level, 1, &refs, NULL); - if (ret < 0) { - err = ret; - goto out; + if (ret < 0) { + err = ret; + goto out; + } + nrefs->bytenr[*level] = path->nodes[*level]->start; + nrefs->refs[*level] = refs; } if (refs > 1) { @@ -1457,11 +2026,20 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); - ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1, - 1, &refs, NULL); - if (ret < 0) - refs = 0; + blocksize = root->nodesize; + + if (bytenr == nrefs->bytenr[*level - 1]) { + refs = nrefs->refs[*level - 1]; + } else { + ret = btrfs_lookup_extent_info(NULL, root, bytenr, + *level - 1, 1, &refs, NULL); + if (ret < 0) { + refs = 0; + } else { + nrefs->bytenr[*level - 1] = bytenr; + nrefs->refs[*level - 1] = refs; + } + } if (refs > 1) { ret = enter_shared_node(root, bytenr, refs, @@ -1478,7 +2056,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); - if (!next) { + if (!extent_buffer_uptodate(next)) { struct btrfs_key node_key; btrfs_node_key_to_cpu(path->nodes[*level], @@ -1487,7 +2065,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, btrfs_add_corrupt_extent_record(root->fs_info, &node_key, path->nodes[*level]->start, - root->leafsize, *level); + root->nodesize, *level); err = -EIO; goto out; } @@ -1554,7 +2132,7 @@ static int check_root_dir(struct inode_record *rec) goto out; if (list_empty(&rec->backrefs)) goto out; - backref = list_entry(rec->backrefs.next, struct inode_backref, list); + backref = to_inode_backref(rec->backrefs.next); if (!backref->found_inode_ref) goto out; if (backref->index != 0 || backref->namelen != 2 || @@ -1622,8 +2200,41 @@ static int repair_inode_orphan_item(struct btrfs_trans_handle *trans, return ret; } -static int add_missing_dir_index(struct btrfs_root *root, - struct cache_tree *inode_cache, +static int repair_inode_nbytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode_record *rec) +{ + struct btrfs_inode_item *ei; + struct btrfs_key key; + int ret = 0; + + key.objectid = rec->ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto out; + } + + /* Since ret == 0, no need to check anything */ + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size); + btrfs_mark_buffer_dirty(path->nodes[0]); + rec->errors &= ~I_ERR_FILE_NBYTES_WRONG; + printf("reset nbytes for ino %llu root %llu\n", + rec->ino, root->root_key.objectid); +out: + btrfs_release_path(path); + return ret; +} + +static int add_missing_dir_index(struct btrfs_root *root, + struct cache_tree *inode_cache, struct inode_record *rec, struct inode_backref *backref) { @@ -1676,6 +2287,7 @@ static int add_missing_dir_index(struct btrfs_root *root, backref->found_dir_index = 1; dir_rec = get_inode_rec(inode_cache, backref->dir, 0); + BUG_ON(IS_ERR(dir_rec)); if (!dir_rec) return 0; dir_rec->found_size += backref->namelen; @@ -2005,7 +2617,7 @@ static int reset_nlink(struct btrfs_trans_handle *trans, list_for_each_entry(backref, &rec->backrefs, list) { ret = btrfs_add_link(trans, root, rec->ino, backref->dir, backref->name, backref->namelen, - backref->ref_type, &backref->index, 1); + backref->filetype, &backref->index, 1); if (ret < 0) goto out; } @@ -2069,13 +2681,17 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino, mode); if (ret < 0) { - fprintf(stderr, "Failed to create '%s' dir: %s", + fprintf(stderr, "Failed to create '%s' dir: %s\n", dir_name, strerror(-ret)); goto out; } ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino, namebuf, namelen, type, NULL, 1); - if (ret == -EEXIST) { + /* + * Add ".INO" suffix several times to handle case where + * "FILENAME.INO" is already taken by another file. + */ + while (ret == -EEXIST) { /* * Conflicting file name, add ".INO" as suffix * +1 for '.' */ @@ -2093,7 +2709,7 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, } if (ret < 0) { fprintf(stderr, - "Failed to link the inode %llu to %s dir: %s", + "Failed to link the inode %llu to %s dir: %s\n", rec->ino, dir_name, strerror(-ret)); goto out; } @@ -2107,9 +2723,14 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n", namelen, namebuf, dir_name); } - rec->errors &= ~I_ERR_LINK_COUNT_WRONG; printf("Fixed the nlink of inode %llu\n", rec->ino); out: + /* + * Clear the flag anyway, or we will loop forever for the same inode + * as it will not be removed from the bad inode list and the dead loop + * happens. + */ + rec->errors &= ~I_ERR_LINK_COUNT_WRONG; btrfs_release_path(path); return ret; } @@ -2195,12 +2816,6 @@ static int repair_inode_no_item(struct btrfs_trans_handle *trans, int type_recovered = 0; int ret = 0; - /* - * TODO: - * 1. salvage data from existing file extent and - * punch hole to keep fi ext consistent. - * 2. salvage data from extent tree - */ printf("Trying to rebuild inode:%llu\n", rec->ino); type_recovered = !find_file_type(rec, &filetype); @@ -2214,9 +2829,7 @@ static int repair_inode_no_item(struct btrfs_trans_handle *trans, * For undetermined one, use FILE as fallback. * * TODO: - * 1. If found extent belong to it in extent tree, it must be FILE - * Need extra hook in extent tree scan. - * 2. If found backref(inode_index/item is already handled) to it, + * 1. If found backref(inode_index/item is already handled) to it, * it must be DIR. * Need new inode-inode ref structure to allow search for that. */ @@ -2228,8 +2841,11 @@ static int repair_inode_no_item(struct btrfs_trans_handle *trans, } else if (rec->found_dir_item) { type_recovered = 1; filetype = BTRFS_FT_DIR; - } else { - printf("Can't determint the filetype for inode %llu, assume it is a normal file\n", + } else if (!list_empty(&rec->orphan_extents)) { + type_recovered = 1; + filetype = BTRFS_FT_REG_FILE; + } else{ + printf("Can't determine the filetype for inode %llu, assume it is a normal file\n", rec->ino); type_recovered = 1; filetype = BTRFS_FT_REG_FILE; @@ -2258,6 +2874,107 @@ out: return ret; } +static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode_record *rec) +{ + struct orphan_data_extent *orphan; + struct orphan_data_extent *tmp; + int ret = 0; + + list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) { + /* + * Check for conflicting file extents + * + * Here we don't know whether the extents is compressed or not, + * so we can only assume it not compressed nor data offset, + * and use its disk_len as extent length. + */ + ret = btrfs_get_extent(NULL, root, path, orphan->objectid, + orphan->offset, orphan->disk_len, 0); + btrfs_release_path(path); + if (ret < 0) + goto out; + if (!ret) { + fprintf(stderr, + "orphan extent (%llu, %llu) conflicts, delete the orphan\n", + orphan->disk_bytenr, orphan->disk_len); + ret = btrfs_free_extent(trans, + root->fs_info->extent_root, + orphan->disk_bytenr, orphan->disk_len, + 0, root->objectid, orphan->objectid, + orphan->offset); + if (ret < 0) + goto out; + } + ret = btrfs_insert_file_extent(trans, root, orphan->objectid, + orphan->offset, orphan->disk_bytenr, + orphan->disk_len, orphan->disk_len); + if (ret < 0) + goto out; + + /* Update file size info */ + rec->found_size += orphan->disk_len; + if (rec->found_size == rec->nbytes) + rec->errors &= ~I_ERR_FILE_NBYTES_WRONG; + + /* Update the file extent hole info too */ + ret = del_file_extent_hole(&rec->holes, orphan->offset, + orphan->disk_len); + if (ret < 0) + goto out; + if (RB_EMPTY_ROOT(&rec->holes)) + rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT; + + list_del(&orphan->list); + free(orphan); + } + rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN; +out: + return ret; +} + +static int repair_inode_discount_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode_record *rec) +{ + struct rb_node *node; + struct file_extent_hole *hole; + int found = 0; + int ret = 0; + + node = rb_first(&rec->holes); + + while (node) { + found = 1; + hole = rb_entry(node, struct file_extent_hole, node); + ret = btrfs_punch_hole(trans, root, rec->ino, + hole->start, hole->len); + if (ret < 0) + goto out; + ret = del_file_extent_hole(&rec->holes, hole->start, + hole->len); + if (ret < 0) + goto out; + if (RB_EMPTY_ROOT(&rec->holes)) + rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT; + node = rb_first(&rec->holes); + } + /* special case for a file losing all its file extent */ + if (!found) { + ret = btrfs_punch_hole(trans, root, rec->ino, 0, + round_up(rec->isize, root->sectorsize)); + if (ret < 0) + goto out; + } + printf("Fixed discount file extents for inode: %llu in root: %llu\n", + rec->ino, root->objectid); +out: + return ret; +} + static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) { struct btrfs_trans_handle *trans; @@ -2267,7 +2984,10 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM | I_ERR_LINK_COUNT_WRONG | - I_ERR_NO_INODE_ITEM))) + I_ERR_NO_INODE_ITEM | + I_ERR_FILE_EXTENT_ORPHAN | + I_ERR_FILE_EXTENT_DISCOUNT| + I_ERR_FILE_NBYTES_WRONG))) return rec->errors; path = btrfs_alloc_path(); @@ -2289,12 +3009,18 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) if (rec->errors & I_ERR_NO_INODE_ITEM) ret = repair_inode_no_item(trans, root, path, rec); + if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN) + ret = repair_inode_orphan_extent(trans, root, path, rec); + if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT) + ret = repair_inode_discount_extent(trans, root, path, rec); if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG) ret = repair_inode_isize(trans, root, path, rec); if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM) ret = repair_inode_orphan_item(trans, root, path, rec); if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG) ret = repair_inode_nlinks(trans, root, path, rec); + if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG) + ret = repair_inode_nbytes(trans, root, path, rec); btrfs_commit_transaction(trans, root); btrfs_free_path(path); return ret; @@ -2322,7 +3048,7 @@ static int check_inode_recs(struct btrfs_root *root, /* * We need to record the highest inode number for later 'lost+found' * dir creation. - * We must select a ino not used/refered by any existing inode, or + * We must select an ino not used/referred by any existing inode, or * 'lost+found' ino may be a missing ino in a corrupted leaf, * this may cause 'lost+found' dir has wrong nlinks. */ @@ -2386,6 +3112,7 @@ static int check_inode_recs(struct btrfs_root *root, return err; rec = get_inode_rec(inode_cache, root_dirid, 0); + BUG_ON(IS_ERR(rec)); if (rec) { ret = check_root_dir(rec); if (ret) { @@ -2493,13 +3220,16 @@ static struct root_record *get_root_rec(struct cache_tree *root_cache, rec = container_of(cache, struct root_record, cache); } else { rec = calloc(1, sizeof(*rec)); + if (!rec) + return ERR_PTR(-ENOMEM); rec->objectid = objectid; INIT_LIST_HEAD(&rec->backrefs); rec->cache.start = objectid; rec->cache.size = 1; ret = insert_cache_extent(root_cache, &rec->cache); - BUG_ON(ret); + if (ret) + return ERR_PTR(-EEXIST); } return rec; } @@ -2519,8 +3249,9 @@ static struct root_backref *get_root_backref(struct root_record *rec, return backref; } - backref = malloc(sizeof(*backref) + namelen + 1); - memset(backref, 0, sizeof(*backref)); + backref = calloc(1, sizeof(*backref) + namelen + 1); + if (!backref) + return NULL; backref->ref_root = ref_root; backref->dir = dir; backref->index = index; @@ -2538,8 +3269,7 @@ static void free_root_record(struct cache_extent *cache) rec = container_of(cache, struct root_record, cache); while (!list_empty(&rec->backrefs)) { - backref = list_entry(rec->backrefs.next, - struct root_backref, list); + backref = to_root_backref(rec->backrefs.next); list_del(&backref->list); free(backref); } @@ -2558,7 +3288,9 @@ static int add_root_backref(struct cache_tree *root_cache, struct root_backref *backref; rec = get_root_rec(root_cache, root_id); + BUG_ON(IS_ERR(rec)); backref = get_root_backref(rec, ref_root, dir, index, name, namelen); + BUG_ON(!backref); backref->errors |= errors; @@ -2663,6 +3395,7 @@ static int check_root_refs(struct btrfs_root *root, int errors = 0; rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID); + BUG_ON(IS_ERR(rec)); rec->found_ref = 1; /* fixme: this can not detect circular references */ @@ -2684,6 +3417,7 @@ static int check_root_refs(struct btrfs_root *root, ref_root = get_root_rec(root_cache, backref->ref_root); + BUG_ON(IS_ERR(ref_root)); if (ref_root->found_ref > 0) continue; @@ -2915,7 +3649,10 @@ static int check_fs_root(struct btrfs_root *root, struct root_record *rec; struct btrfs_root_item *root_item = &root->root_item; struct cache_tree corrupt_blocks; + struct orphan_data_extent *orphan; + struct orphan_data_extent *tmp; enum btrfs_tree_block_status status; + struct node_refs nrefs; /* * Reuse the corrupt_block cache tree to record corrupted tree block @@ -2925,8 +3662,10 @@ static int check_fs_root(struct btrfs_root *root, */ cache_tree_init(&corrupt_blocks); root->fs_info->corrupt_blocks = &corrupt_blocks; + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { rec = get_root_rec(root_cache, root->root_key.objectid); + BUG_ON(IS_ERR(rec)); if (btrfs_root_refs(root_item) > 0) rec->found_root_item = 1; } @@ -2935,6 +3674,19 @@ static int check_fs_root(struct btrfs_root *root, memset(&root_node, 0, sizeof(root_node)); cache_tree_init(&root_node.root_cache); cache_tree_init(&root_node.inode_cache); + memset(&nrefs, 0, sizeof(nrefs)); + + /* Move the orphan extent record to corresponding inode_record */ + list_for_each_entry_safe(orphan, tmp, + &root->orphan_data_extents, list) { + struct inode_record *inode; + + inode = get_inode_rec(&root_node.inode_cache, orphan->objectid, + 1); + BUG_ON(IS_ERR(inode)); + inode->errors |= I_ERR_FILE_EXTENT_ORPHAN; + list_move(&orphan->list, &inode->orphan_extents); + } level = btrfs_header_level(root->node); memset(wc->nodes, 0, sizeof(wc->nodes)); @@ -2972,7 +3724,7 @@ static int check_fs_root(struct btrfs_root *root, } while (1) { - wret = walk_down_tree(root, &path, wc, &level); + wret = walk_down_tree(root, &path, wc, &level, &nrefs); if (wret < 0) ret = wret; if (wret != 0) @@ -3033,6 +3785,7 @@ skip_walking: free_corrupt_blocks_tree(&corrupt_blocks); root->fs_info->corrupt_blocks = NULL; + free_orphan_data_extents(&root->orphan_data_extents); return ret; } @@ -3056,6 +3809,11 @@ static int check_fs_roots(struct btrfs_root *root, int ret; int err = 0; + if (ctx.progress_enabled) { + ctx.tp = TASK_FS_ROOTS; + task_start(ctx.info); + } + /* * Just in case we made any changes to the extent tree that weren't * reflected into the free space cache yet. @@ -3132,27 +3890,28 @@ out: if (!cache_tree_empty(&wc.shared)) fprintf(stderr, "warning line %d\n", __LINE__); + task_stop(ctx.info); + return err; } static int all_backpointers_checked(struct extent_record *rec, int print_errs) { - struct list_head *cur = rec->backrefs.next; + struct rb_node *n; struct extent_backref *back; struct tree_backref *tback; struct data_backref *dback; u64 found = 0; int err = 0; - while(cur != &rec->backrefs) { - back = list_entry(cur, struct extent_backref, list); - cur = cur->next; + for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) { + back = rb_node_to_extent_backref(n); if (!back->found_extent_tree) { err = 1; if (!print_errs) goto out; if (back->is_data) { - dback = (struct data_backref *)back; + dback = to_data_backref(back); fprintf(stderr, "Backref %llu %s %llu" " owner %llu offset %llu num_refs %lu" " not found in extent tree\n", @@ -3166,7 +3925,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) (unsigned long long)dback->offset, (unsigned long)dback->num_refs); } else { - tback = (struct tree_backref *)back; + tback = to_tree_backref(back); fprintf(stderr, "Backref %llu parent %llu" " root %llu not found in extent tree\n", (unsigned long long)rec->start, @@ -3178,7 +3937,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) err = 1; if (!print_errs) goto out; - tback = (struct tree_backref *)back; + tback = to_tree_backref(back); fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n", (unsigned long long)rec->start, back->full_backref ? "parent" : "root", @@ -3187,7 +3946,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) (unsigned long long)tback->root, back); } if (back->is_data) { - dback = (struct data_backref *)back; + dback = to_data_backref(back); if (dback->found_ref != dback->num_refs) { err = 1; if (!print_errs) @@ -3231,7 +3990,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) if (!back->is_data) { found += 1; } else { - dback = (struct data_backref *)back; + dback = to_data_backref(back); found += dback->found_ref; } } @@ -3249,17 +4008,16 @@ out: return err; } -static int free_all_extent_backrefs(struct extent_record *rec) +static void __free_one_backref(struct rb_node *node) { - struct extent_backref *back; - struct list_head *cur; - while (!list_empty(&rec->backrefs)) { - cur = rec->backrefs.next; - back = list_entry(cur, struct extent_backref, list); - list_del(cur); - free(back); - } - return 0; + struct extent_backref *back = rb_node_to_extent_backref(node); + + free(back); +} + +static void free_all_extent_backrefs(struct extent_record *rec) +{ + rb_free_nodes(&rec->backref_tree, __free_one_backref); } static void free_extent_record_cache(struct btrfs_fs_info *fs_info, @@ -3273,7 +4031,6 @@ static void free_extent_record_cache(struct btrfs_fs_info *fs_info, if (!cache) break; rec = container_of(cache, struct extent_record, cache); - btrfs_unpin_extent(fs_info, rec->start, rec->max_size); remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); free(rec); @@ -3285,7 +4042,9 @@ static int maybe_free_extent_rec(struct cache_tree *extent_cache, { if (rec->content_checked && rec->owner_ref_checked && rec->extent_item_refs == rec->refs && rec->refs > 0 && - rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) { + rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) && + !rec->bad_full_backref && !rec->crossing_stripes && + !rec->wrong_chunk_type) { remove_cache_extent(extent_cache, &rec->cache); free_all_extent_backrefs(rec); list_del_init(&rec->list); @@ -3298,7 +4057,7 @@ static int check_owner_ref(struct btrfs_root *root, struct extent_record *rec, struct extent_buffer *buf) { - struct extent_backref *node; + struct extent_backref *node, *tmp; struct tree_backref *back; struct btrfs_root *ref_root; struct btrfs_key key; @@ -3308,14 +4067,15 @@ static int check_owner_ref(struct btrfs_root *root, int found = 0; int ret; - list_for_each_entry(node, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(node, tmp, + &rec->backref_tree, node) { if (node->is_data) continue; if (!node->found_ref) continue; if (node->full_backref) continue; - back = (struct tree_backref *)node; + back = to_tree_backref(node); if (btrfs_header_owner(buf) == back->root) return 0; } @@ -3353,18 +4113,16 @@ static int check_owner_ref(struct btrfs_root *root, static int is_extent_tree_record(struct extent_record *rec) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; + struct extent_backref *ref, *tmp; struct tree_backref *back; int is_extent = 0; - while(cur != &rec->backrefs) { - node = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (node->is_data) + rbtree_postorder_for_each_entry_safe(ref, tmp, + &rec->backref_tree, node) { + if (ref->is_data) return 0; - back = (struct tree_backref *)node; - if (node->full_backref) + back = to_tree_backref(ref); + if (ref->full_backref) return 0; if (back->root == BTRFS_EXTENT_TREE_OBJECTID) is_extent = 1; @@ -3600,11 +4358,11 @@ again: * Attempt to fix basic block failures. If we can't fix it for whatever reason * then just return -EIO. */ -static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int try_to_fix_bad_block(struct btrfs_root *root, struct extent_buffer *buf, enum btrfs_tree_block_status status) { + struct btrfs_trans_handle *trans; struct ulist *roots; struct ulist_node *node; struct btrfs_root *search_root; @@ -3621,7 +4379,7 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, if (!path) return -EIO; - ret = btrfs_find_all_roots(trans, root->fs_info, buf->start, + ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots); if (ret) { btrfs_free_path(path); @@ -3640,7 +4398,12 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, break; } - record_root_in_trans(trans, search_root); + + trans = btrfs_start_transaction(search_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } path->lowest_level = btrfs_header_level(buf); path->skip_check_block = 1; @@ -3651,23 +4414,26 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1); if (ret) { ret = -EIO; + btrfs_commit_transaction(trans, search_root); break; } if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) ret = fix_key_order(trans, search_root, path); else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) ret = fix_item_offset(trans, search_root, path); - if (ret) + if (ret) { + btrfs_commit_transaction(trans, search_root); break; + } btrfs_release_path(path); + btrfs_commit_transaction(trans, search_root); } ulist_free(roots); btrfs_free_path(path); return ret; } -static int check_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_block(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, u64 flags) { @@ -3703,8 +4469,7 @@ static int check_block(struct btrfs_trans_handle *trans, if (status != BTRFS_TREE_BLOCK_CLEAN) { if (repair) - status = try_to_fix_bad_block(trans, root, buf, - status); + status = try_to_fix_bad_block(root, buf, status); if (status != BTRFS_TREE_BLOCK_CLEAN) { ret = -EIO; fprintf(stderr, "bad block %llu\n", @@ -3712,7 +4477,7 @@ static int check_block(struct btrfs_trans_handle *trans, } else { /* * Signal to callers we need to start the scan over - * again since we'll have cow'ed blocks. + * again since we'll have cowed blocks. */ ret = -EAGAIN; } @@ -3731,38 +4496,40 @@ static int check_block(struct btrfs_trans_handle *trans, return ret; } + static struct tree_backref *find_tree_backref(struct extent_record *rec, u64 parent, u64 root) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; - struct tree_backref *back; + struct rb_node *node; + struct tree_backref *back = NULL; + struct tree_backref match = { + .node = { + .is_data = 0, + }, + }; - while(cur != &rec->backrefs) { - node = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (node->is_data) - continue; - back = (struct tree_backref *)node; - if (parent > 0) { - if (!node->full_backref) - continue; - if (parent == back->parent) - return back; - } else { - if (node->full_backref) - continue; - if (back->root == root) - return back; - } + if (parent) { + match.parent = parent; + match.node.full_backref = 1; + } else { + match.root = root; } - return NULL; + + node = rb_search(&rec->backref_tree, &match.node.node, + (rb_compare_keys)compare_extent_backref, NULL); + if (node) + back = to_tree_backref(rb_node_to_extent_backref(node)); + + return back; } static struct tree_backref *alloc_tree_backref(struct extent_record *rec, u64 parent, u64 root) { struct tree_backref *ref = malloc(sizeof(*ref)); + + if (!ref) + return NULL; memset(&ref->node, 0, sizeof(ref->node)); if (parent > 0) { ref->parent = parent; @@ -3771,7 +4538,7 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec, ref->root = root; ref->node.full_backref = 0; } - list_add_tail(&ref->node.list, &rec->backrefs); + rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref); return ref; } @@ -3782,35 +4549,32 @@ static struct data_backref *find_data_backref(struct extent_record *rec, int found_ref, u64 disk_bytenr, u64 bytes) { - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; - struct data_backref *back; + struct rb_node *node; + struct data_backref *back = NULL; + struct data_backref match = { + .node = { + .is_data = 1, + }, + .owner = owner, + .offset = offset, + .bytes = bytes, + .found_ref = found_ref, + .disk_bytenr = disk_bytenr, + }; - while(cur != &rec->backrefs) { - node = list_entry(cur, struct extent_backref, list); - cur = cur->next; - if (!node->is_data) - continue; - back = (struct data_backref *)node; - if (parent > 0) { - if (!node->full_backref) - continue; - if (parent == back->parent) - return back; - } else { - if (node->full_backref) - continue; - if (back->root == root && back->owner == owner && - back->offset == offset) { - if (found_ref && node->found_ref && - (back->bytes != bytes || - back->disk_bytenr != disk_bytenr)) - continue; - return back; - } - } + if (parent) { + match.parent = parent; + match.node.full_backref = 1; + } else { + match.root = root; } - return NULL; + + node = rb_search(&rec->backref_tree, &match.node.node, + (rb_compare_keys)compare_extent_backref, NULL); + if (node) + back = to_data_backref(rb_node_to_extent_backref(node)); + + return back; } static struct data_backref *alloc_data_backref(struct extent_record *rec, @@ -3819,6 +4583,9 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec, u64 max_size) { struct data_backref *ref = malloc(sizeof(*ref)); + + if (!ref) + return NULL; memset(&ref->node, 0, sizeof(ref->node)); ref->node.is_data = 1; @@ -3836,38 +4603,142 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec, ref->bytes = max_size; ref->found_ref = 0; ref->num_refs = 0; - list_add_tail(&ref->node.list, &rec->backrefs); + rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref); if (max_size > rec->max_size) rec->max_size = max_size; return ref; } +/* Check if the type of extent matches with its chunk */ +static void check_extent_type(struct extent_record *rec) +{ + struct btrfs_block_group_cache *bg_cache; + + bg_cache = btrfs_lookup_first_block_group(global_info, rec->start); + if (!bg_cache) + return; + + /* data extent, check chunk directly*/ + if (!rec->metadata) { + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) + rec->wrong_chunk_type = 1; + return; + } + + /* metadata extent, check the obvious case first */ + if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + rec->wrong_chunk_type = 1; + return; + } + + /* + * Check SYSTEM extent, as it's also marked as metadata, we can only + * make sure it's a SYSTEM extent by its backref + */ + if (!RB_EMPTY_ROOT(&rec->backref_tree)) { + struct extent_backref *node; + struct tree_backref *tback; + u64 bg_type; + + node = rb_node_to_extent_backref(rb_first(&rec->backref_tree)); + if (node->is_data) { + /* tree block shouldn't have data backref */ + rec->wrong_chunk_type = 1; + return; + } + tback = container_of(node, struct tree_backref, node); + + if (tback->root == BTRFS_CHUNK_TREE_OBJECTID) + bg_type = BTRFS_BLOCK_GROUP_SYSTEM; + else + bg_type = BTRFS_BLOCK_GROUP_METADATA; + if (!(bg_cache->flags & bg_type)) + rec->wrong_chunk_type = 1; + } +} + +/* + * Allocate a new extent record, fill default values from @tmpl and insert int + * @extent_cache. Caller is supposed to make sure the [start,nr) is not in + * the cache, otherwise it fails. + */ +static int add_extent_rec_nolookup(struct cache_tree *extent_cache, + struct extent_record *tmpl) +{ + struct extent_record *rec; + int ret = 0; + + rec = malloc(sizeof(*rec)); + if (!rec) + return -ENOMEM; + rec->start = tmpl->start; + rec->max_size = tmpl->max_size; + rec->nr = max(tmpl->nr, tmpl->max_size); + rec->found_rec = tmpl->found_rec; + rec->content_checked = tmpl->content_checked; + rec->owner_ref_checked = tmpl->owner_ref_checked; + rec->num_duplicates = 0; + rec->metadata = tmpl->metadata; + rec->flag_block_full_backref = FLAG_UNSET; + rec->bad_full_backref = 0; + rec->crossing_stripes = 0; + rec->wrong_chunk_type = 0; + rec->is_root = tmpl->is_root; + rec->refs = tmpl->refs; + rec->extent_item_refs = tmpl->extent_item_refs; + rec->parent_generation = tmpl->parent_generation; + INIT_LIST_HEAD(&rec->backrefs); + INIT_LIST_HEAD(&rec->dups); + INIT_LIST_HEAD(&rec->list); + rec->backref_tree = RB_ROOT; + memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key)); + rec->cache.start = tmpl->start; + rec->cache.size = tmpl->nr; + ret = insert_cache_extent(extent_cache, &rec->cache); + BUG_ON(ret); + bytes_used += rec->nr; + + if (tmpl->metadata) + rec->crossing_stripes = check_crossing_stripes(rec->start, + global_info->tree_root->nodesize); + check_extent_type(rec); + return ret; +} + +/* + * Lookup and modify an extent, some values of @tmpl are interpreted verbatim, + * some are hints: + * - refs - if found, increase refs + * - is_root - if found, set + * - content_checked - if found, set + * - owner_ref_checked - if found, set + * + * If not found, create a new one, initialize and insert. + */ static int add_extent_rec(struct cache_tree *extent_cache, - struct btrfs_key *parent_key, u64 parent_gen, - u64 start, u64 nr, u64 extent_item_refs, - int is_root, int inc_ref, int set_checked, - int metadata, int extent_rec, u64 max_size) + struct extent_record *tmpl) { struct extent_record *rec; struct cache_extent *cache; int ret = 0; int dup = 0; - cache = lookup_cache_extent(extent_cache, start, nr); + cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr); if (cache) { rec = container_of(cache, struct extent_record, cache); - if (inc_ref) + if (tmpl->refs) rec->refs++; if (rec->nr == 1) - rec->nr = max(nr, max_size); + rec->nr = max(tmpl->nr, tmpl->max_size); /* * We need to make sure to reset nr to whatever the extent * record says was the real size, this way we can compare it to * the backrefs. */ - if (extent_rec) { - if (start != rec->start || rec->found_rec) { + if (tmpl->found_rec) { + if (tmpl->start != rec->start || rec->found_rec) { struct extent_record *tmp; dup = 1; @@ -3884,97 +4755,61 @@ static int add_extent_rec(struct cache_tree *extent_cache, tmp = malloc(sizeof(*tmp)); if (!tmp) return -ENOMEM; - tmp->start = start; - tmp->max_size = max_size; - tmp->nr = nr; + tmp->start = tmpl->start; + tmp->max_size = tmpl->max_size; + tmp->nr = tmpl->nr; tmp->found_rec = 1; - tmp->metadata = metadata; - tmp->extent_item_refs = extent_item_refs; + tmp->metadata = tmpl->metadata; + tmp->extent_item_refs = tmpl->extent_item_refs; INIT_LIST_HEAD(&tmp->list); list_add_tail(&tmp->list, &rec->dups); rec->num_duplicates++; } else { - rec->nr = nr; + rec->nr = tmpl->nr; rec->found_rec = 1; } } - if (extent_item_refs && !dup) { + if (tmpl->extent_item_refs && !dup) { if (rec->extent_item_refs) { fprintf(stderr, "block %llu rec " "extent_item_refs %llu, passed %llu\n", - (unsigned long long)start, + (unsigned long long)tmpl->start, (unsigned long long) rec->extent_item_refs, - (unsigned long long)extent_item_refs); + (unsigned long long)tmpl->extent_item_refs); } - rec->extent_item_refs = extent_item_refs; + rec->extent_item_refs = tmpl->extent_item_refs; } - if (is_root) + if (tmpl->is_root) rec->is_root = 1; - if (set_checked) { + if (tmpl->content_checked) rec->content_checked = 1; + if (tmpl->owner_ref_checked) rec->owner_ref_checked = 1; - } - - if (parent_key) - btrfs_cpu_key_to_disk(&rec->parent_key, parent_key); - if (parent_gen) - rec->parent_generation = parent_gen; - - if (rec->max_size < max_size) - rec->max_size = max_size; + memcpy(&rec->parent_key, &tmpl->parent_key, + sizeof(tmpl->parent_key)); + if (tmpl->parent_generation) + rec->parent_generation = tmpl->parent_generation; + if (rec->max_size < tmpl->max_size) + rec->max_size = tmpl->max_size; + /* + * A metadata extent can't cross stripe_len boundary, otherwise + * kernel scrub won't be able to handle it. + * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check + * it. + */ + if (tmpl->metadata) + rec->crossing_stripes = check_crossing_stripes( + rec->start, global_info->tree_root->nodesize); + check_extent_type(rec); maybe_free_extent_rec(extent_cache, rec); return ret; } - rec = malloc(sizeof(*rec)); - rec->start = start; - rec->max_size = max_size; - rec->nr = max(nr, max_size); - rec->found_rec = !!extent_rec; - rec->content_checked = 0; - rec->owner_ref_checked = 0; - rec->num_duplicates = 0; - rec->metadata = metadata; - INIT_LIST_HEAD(&rec->backrefs); - INIT_LIST_HEAD(&rec->dups); - INIT_LIST_HEAD(&rec->list); - - if (is_root) - rec->is_root = 1; - else - rec->is_root = 0; - - if (inc_ref) - rec->refs = 1; - else - rec->refs = 0; - - if (extent_item_refs) - rec->extent_item_refs = extent_item_refs; - else - rec->extent_item_refs = 0; - - if (parent_key) - btrfs_cpu_key_to_disk(&rec->parent_key, parent_key); - else - memset(&rec->parent_key, 0, sizeof(*parent_key)); - if (parent_gen) - rec->parent_generation = parent_gen; - else - rec->parent_generation = 0; + ret = add_extent_rec_nolookup(extent_cache, tmpl); - rec->cache.start = start; - rec->cache.size = nr; - ret = insert_cache_extent(extent_cache, &rec->cache); - BUG_ON(ret); - bytes_used += nr; - if (set_checked) { - rec->content_checked = 1; - rec->owner_ref_checked = 1; - } return ret; } @@ -3987,8 +4822,15 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) { - add_extent_rec(extent_cache, NULL, 0, bytenr, - 1, 0, 0, 0, 0, 1, 0, 0); + struct extent_record tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = bytenr; + tmpl.nr = 1; + tmpl.metadata = 1; + + add_extent_rec_nolookup(extent_cache, &tmpl); + cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) abort(); @@ -4000,8 +4842,10 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, } back = find_tree_backref(rec, parent, root); - if (!back) + if (!back) { back = alloc_tree_backref(rec, parent, root); + BUG_ON(!back); + } if (found_ref) { if (back->node.found_ref) { @@ -4022,6 +4866,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, } back->node.found_extent_tree = 1; } + check_extent_type(rec); maybe_free_extent_rec(extent_cache, rec); return 0; } @@ -4036,8 +4881,15 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) { - add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0, - 0, 0, max_size); + struct extent_record tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = bytenr; + tmpl.nr = 1; + tmpl.max_size = max_size; + + add_extent_rec_nolookup(extent_cache, &tmpl); + cache = lookup_cache_extent(extent_cache, bytenr, 1); if (!cache) abort(); @@ -4059,9 +4911,11 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, */ back = find_data_backref(rec, parent, root, owner, offset, found_ref, bytenr, max_size); - if (!back) + if (!back) { back = alloc_data_backref(rec, parent, root, owner, offset, max_size); + BUG_ON(!back); + } if (found_ref) { BUG_ON(num_refs != 1); @@ -4291,14 +5145,12 @@ struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf, ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); num_stripes = btrfs_chunk_num_stripes(leaf, ptr); - rec = malloc(btrfs_chunk_record_size(num_stripes)); + rec = calloc(1, btrfs_chunk_record_size(num_stripes)); if (!rec) { fprintf(stderr, "memory allocation failed\n"); exit(-1); } - memset(rec, 0, btrfs_chunk_record_size(num_stripes)); - INIT_LIST_HEAD(&rec->list); INIT_LIST_HEAD(&rec->dextents); rec->bg_rec = NULL; @@ -4396,12 +5248,11 @@ btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key, struct btrfs_block_group_item *ptr; struct block_group_record *rec; - rec = malloc(sizeof(*rec)); + rec = calloc(1, sizeof(*rec)); if (!rec) { fprintf(stderr, "memory allocation failed\n"); exit(-1); } - memset(rec, 0, sizeof(*rec)); rec->cache.start = key->objectid; rec->cache.size = key->offset; @@ -4445,12 +5296,11 @@ btrfs_new_device_extent_record(struct extent_buffer *leaf, struct device_extent_record *rec; struct btrfs_dev_extent *ptr; - rec = malloc(sizeof(*rec)); + rec = calloc(1, sizeof(*rec)); if (!rec) { fprintf(stderr, "memory allocation failed\n"); exit(-1); } - memset(rec, 0, sizeof(*rec)); rec->cache.objectid = key->objectid; rec->cache.start = key->offset; @@ -4504,6 +5354,7 @@ static int process_extent_item(struct btrfs_root *root, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; struct btrfs_key key; + struct extent_record tmpl; unsigned long end; unsigned long ptr; int type; @@ -4517,7 +5368,7 @@ static int process_extent_item(struct btrfs_root *root, if (key.type == BTRFS_METADATA_ITEM_KEY) { metadata = 1; - num_bytes = root->leafsize; + num_bytes = root->nodesize; } else { num_bytes = key.offset; } @@ -4531,16 +5382,32 @@ static int process_extent_item(struct btrfs_root *root, #else BUG(); #endif - return add_extent_rec(extent_cache, NULL, 0, key.objectid, - num_bytes, refs, 0, 0, 0, metadata, 1, - num_bytes); + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = key.objectid; + tmpl.nr = num_bytes; + tmpl.extent_item_refs = refs; + tmpl.metadata = metadata; + tmpl.found_rec = 1; + tmpl.max_size = num_bytes; + + return add_extent_rec(extent_cache, &tmpl); } ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); refs = btrfs_extent_refs(eb, ei); + if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; + else + metadata = 0; - add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes, - refs, 0, 0, 0, metadata, 1, num_bytes); + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = key.objectid; + tmpl.nr = num_bytes; + tmpl.extent_item_refs = refs; + tmpl.metadata = metadata; + tmpl.found_rec = 1; + tmpl.max_size = num_bytes; + add_extent_rec(extent_cache, &tmpl); ptr = (unsigned long)(ei + 1); if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK && @@ -4733,7 +5600,7 @@ static int verify_space_cache(struct btrfs_root *root, if (key.type == BTRFS_EXTENT_ITEM_KEY) last = key.objectid + key.offset; else - last = key.objectid + root->leafsize; + last = key.objectid + root->nodesize; path->slots[0]++; continue; } @@ -4745,7 +5612,7 @@ static int verify_space_cache(struct btrfs_root *root, if (key.type == BTRFS_EXTENT_ITEM_KEY) last = key.objectid + key.offset; else - last = key.objectid + root->leafsize; + last = key.objectid + root->nodesize; path->slots[0]++; } @@ -4782,6 +5649,11 @@ static int check_space_cache(struct btrfs_root *root) return 0; } + if (ctx.progress_enabled) { + ctx.tp = TASK_FREE_SPACE; + task_start(ctx.info); + } + while (1) { cache = btrfs_lookup_first_block_group(root->fs_info, start); if (!cache) @@ -4798,53 +5670,41 @@ static int check_space_cache(struct btrfs_root *root) btrfs_remove_free_space_cache(cache); } - ret = load_free_space_cache(root->fs_info, cache); - if (!ret) - continue; + if (btrfs_fs_compat_ro(root->fs_info, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { + ret = exclude_super_stripes(root, cache); + if (ret) { + fprintf(stderr, "could not exclude super stripes: %s\n", + strerror(-ret)); + error++; + continue; + } + ret = load_free_space_tree(root->fs_info, cache); + free_excluded_extents(root, cache); + if (ret < 0) { + fprintf(stderr, "could not load free space tree: %s\n", + strerror(-ret)); + error++; + continue; + } + error += ret; + } else { + ret = load_free_space_cache(root->fs_info, cache); + if (!ret) + continue; + } ret = verify_space_cache(root, cache); if (ret) { - fprintf(stderr, "cache appears valid but isnt %Lu\n", + fprintf(stderr, "cache appears valid but isn't %Lu\n", cache->key.objectid); error++; } } - return error ? -EINVAL : 0; -} - -static int read_extent_data(struct btrfs_root *root, char *data, - u64 logical, u64 *len, int mirror) -{ - u64 offset = 0; - struct btrfs_multi_bio *multi = NULL; - struct btrfs_fs_info *info = root->fs_info; - struct btrfs_device *device; - int ret = 0; - u64 max_len = *len; - - ret = btrfs_map_block(&info->mapping_tree, READ, logical, len, - &multi, mirror, NULL); - if (ret) { - fprintf(stderr, "Couldn't map the block %llu\n", - logical + offset); - goto err; - } - device = multi->stripes[0].dev; - - if (device->fd == 0) - goto err; - if (*len > max_len) - *len = max_len; + task_stop(ctx.info); - ret = pread64(device->fd, data, *len, multi->stripes[0].physical); - if (ret != *len) - ret = -EIO; - else - ret = 0; -err: - kfree(multi); - return ret; + return error ? -EINVAL : 0; } static int check_extent_csums(struct btrfs_root *root, u64 bytenr, @@ -4926,7 +5786,7 @@ static int check_extent_exists(struct btrfs_root *root, u64 bytenr, path = btrfs_alloc_path(); if (!path) { - fprintf(stderr, "Error allocing path\n"); + fprintf(stderr, "Error allocating path\n"); return -ENOMEM; } @@ -4959,7 +5819,7 @@ again: /* * Block group items come before extent items if they have the same - * bytenr, so walk back one more just in case. Dear future traveler, + * bytenr, so walk back one more just in case. Dear future traveller, * first congrats on mastering time travel. Now if it's not too much * trouble could you go back to 2006 and tell Chris to make the * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the @@ -5162,27 +6022,41 @@ static int is_dropped_key(struct btrfs_key *key, return 0; } -static int calc_extent_flag(struct btrfs_root *root, - struct cache_tree *extent_cache, +/* + * Here are the rules for FULL_BACKREF. + * + * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set. + * 2) If btrfs_header_owner(buf) no longer points to buf then we have + * FULL_BACKREF set. + * 3) We cowed the block walking down a reloc tree. This is impossible to tell + * if it happened after the relocation occurred since we'll have dropped the + * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and + * have no real way to know for sure. + * + * We process the blocks one root at a time, and we start from the lowest root + * objectid and go to the highest. So we can just lookup the owner backref for + * the record and if we don't find it then we know it doesn't exist and we have + * a FULL BACKREF. + * + * FIXME: if we ever start reclaiming root objectid's then we need to fix this + * assumption and simply indicate that we _think_ that the FULL BACKREF needs to + * be set or not and then we can check later once we've gathered all the refs. + */ +static int calc_extent_flag(struct btrfs_root *root, + struct cache_tree *extent_cache, struct extent_buffer *buf, struct root_item_record *ri, u64 *flags) { - int i; - int nritems = btrfs_header_nritems(buf); - struct btrfs_key key; struct extent_record *rec; struct cache_extent *cache; - struct data_backref *dback; struct tree_backref *tback; - struct extent_buffer *new_buf; u64 owner = 0; - u64 bytenr; - u64 offset; - u64 ptr; - int size; - int ret; - u8 level; + + cache = lookup_cache_extent(extent_cache, buf->start, 1); + /* we have added this extent before */ + BUG_ON(!cache); + rec = container_of(cache, struct extent_record, cache); /* * Except file/reloc tree, we can not have @@ -5195,96 +6069,32 @@ static int calc_extent_flag(struct btrfs_root *root, */ if (buf->start == ri->bytenr) goto normal; - if (btrfs_is_leaf(buf)) { - /* - * we are searching from original root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(buf); - if (owner == ri->objectid) - goto normal; - /* - * we check every eb here, and if any of - * eb dosen't have original root refers - * to this eb, we set full backref flag for - * this extent, otherwise normal backref. - */ - for (i = 0; i < nritems; i++) { - struct btrfs_file_extent_item *fi; - btrfs_item_key_to_cpu(buf, &key, i); - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - if (btrfs_file_extent_disk_bytenr(buf, fi) == 0) - continue; - bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) - goto full_backref; - offset = btrfs_file_extent_offset(buf, fi); - rec = container_of(cache, struct extent_record, cache); - dback = find_data_backref(rec, 0, ri->objectid, owner, - key.offset - offset, 1, bytenr, bytenr); - if (!dback) - goto full_backref; - } + if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) goto full_backref; - } else { - level = btrfs_header_level(buf); - for (i = 0; i < nritems; i++) { - ptr = btrfs_node_blockptr(buf, i); - size = btrfs_level_size(root, level); - if (i == 0) { - new_buf = read_tree_block(root, ptr, size, 0); - if (!extent_buffer_uptodate(new_buf)) { - free_extent_buffer(new_buf); - ret = -EIO; - return ret; - } - /* - * we are searching from origin root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(new_buf); - free_extent_buffer(new_buf); - if (owner == ri->objectid) - goto normal; - } - cache = lookup_cache_extent(extent_cache, ptr, size); - if (!cache) - goto full_backref; - rec = container_of(cache, struct extent_record, cache); - tback = find_tree_backref(rec, 0, owner); - if (!tback) - goto full_backref; - } - } + owner = btrfs_header_owner(buf); + if (owner == ri->objectid) + goto normal; + + tback = find_tree_backref(rec, 0, owner); + if (!tback) + goto full_backref; normal: *flags = 0; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 0; + if (rec->flag_block_full_backref != FLAG_UNSET && + rec->flag_block_full_backref != 0) + rec->bad_full_backref = 1; return 0; full_backref: *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 1; + if (rec->flag_block_full_backref != FLAG_UNSET && + rec->flag_block_full_backref != 1) + rec->bad_full_backref = 1; return 0; } -static int run_next_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int run_next_block(struct btrfs_root *root, struct block_info *bits, int bits_nr, u64 *last, @@ -5300,6 +6110,7 @@ static int run_next_block(struct btrfs_trans_handle *trans, struct root_item_record *ri) { struct extent_buffer *buf; + struct extent_record *rec = NULL; u64 bytenr; u32 size; u64 parent; @@ -5352,8 +6163,6 @@ static int run_next_block(struct btrfs_trans_handle *trans, } cache = lookup_cache_extent(extent_cache, bytenr, size); if (cache) { - struct extent_record *rec; - rec = container_of(cache, struct extent_record, cache); gen = rec->parent_generation; } @@ -5368,32 +6177,64 @@ static int run_next_block(struct btrfs_trans_handle *trans, nritems = btrfs_header_nritems(buf); - /* - * FIXME, this only works only if we don't have any full - * backref mode. - */ + flags = 0; if (!init_extent_tree) { ret = btrfs_lookup_extent_info(NULL, root, bytenr, btrfs_header_level(buf), 1, NULL, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + } } else { flags = 0; ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } } if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (ri != NULL && + ri->objectid != BTRFS_TREE_RELOC_OBJECTID && + ri->objectid == btrfs_header_owner(buf)) { + /* + * Ok we got to this block from it's original owner and + * we have FULL_BACKREF set. Relocation can leave + * converted blocks over so this is altogether possible, + * however it's not possible if the generation > the + * last snapshot, so check for this case. + */ + if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) && + btrfs_header_generation(buf) > ri->last_snapshot) { + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } + } + } else { + if (ri != NULL && + (ri->objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } + } + + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + rec->flag_block_full_backref = 1; parent = bytenr; owner = 0; } else { + rec->flag_block_full_backref = 0; parent = 0; owner = btrfs_header_owner(buf); } - ret = check_block(trans, root, extent_cache, buf, flags); + ret = check_block(root, extent_cache, buf, flags); if (ret) goto out; @@ -5529,8 +6370,10 @@ static int run_next_block(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(buf, &first_key, 0); level = btrfs_header_level(buf); for (i = 0; i < nritems; i++) { + struct extent_record tmpl; + ptr = btrfs_node_blockptr(buf, i); - size = btrfs_level_size(root, level - 1); + size = root->nodesize; btrfs_node_key_to_cpu(buf, &key, i); if (ri != NULL) { if ((level == ri->drop_level) @@ -5538,10 +6381,16 @@ static int run_next_block(struct btrfs_trans_handle *trans, continue; } } - ret = add_extent_rec(extent_cache, &key, - btrfs_node_ptr_generation(buf, i), - ptr, size, 0, 0, 1, 0, 1, 0, - size); + + memset(&tmpl, 0, sizeof(tmpl)); + btrfs_cpu_key_to_disk(&tmpl.parent_key, &key); + tmpl.parent_generation = btrfs_node_ptr_generation(buf, i); + tmpl.start = ptr; + tmpl.nr = size; + tmpl.refs = 1; + tmpl.metadata = 1; + tmpl.max_size = size; + ret = add_extent_rec(extent_cache, &tmpl); BUG_ON(ret); add_tree_backref(extent_cache, ptr, parent, owner, 1); @@ -5577,12 +6426,21 @@ static int add_root_to_pending(struct extent_buffer *buf, struct cache_tree *nodes, u64 objectid) { + struct extent_record tmpl; + if (btrfs_header_level(buf) > 0) add_pending(nodes, seen, buf->start, buf->len); else add_pending(pending, seen, buf->start, buf->len); - add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len, - 0, 1, 1, 0, 1, 0, buf->len); + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = buf->start; + tmpl.nr = buf->len; + tmpl.is_root = 1; + tmpl.refs = 1; + tmpl.metadata = 1; + tmpl.max_size = buf->len; + add_extent_rec(extent_cache, &tmpl); if (objectid == BTRFS_TREE_RELOC_OBJECTID || btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) @@ -5636,7 +6494,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans, back->node.found_extent_tree = 0; if (!back->node.found_extent_tree && back->node.found_ref) { - list_del(&back->node.list); + rb_erase(&back->node.node, &rec->backref_tree); free(back); } } else { @@ -5655,7 +6513,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans, back->node.found_extent_tree = 0; } if (!back->node.found_extent_tree && back->node.found_ref) { - list_del(&back->node.list); + rb_erase(&back->node.node, &rec->backref_tree); free(back); } } @@ -5731,7 +6589,7 @@ static int delete_extent_records(struct btrfs_trans_handle *trans, if (found_key.type == BTRFS_EXTENT_ITEM_KEY || found_key.type == BTRFS_METADATA_ITEM_KEY) { u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ? - found_key.offset : root->leafsize; + found_key.offset : root->nodesize; ret = btrfs_update_block_group(trans, root, bytenr, bytes, 0, 0); @@ -5766,7 +6624,7 @@ static int record_extent(struct btrfs_trans_handle *trans, if (!back->is_data) rec->max_size = max_t(u64, rec->max_size, - info->extent_root->leafsize); + info->extent_root->nodesize); if (!allocated) { u32 item_size = sizeof(*ei); @@ -5796,7 +6654,7 @@ static int record_extent(struct btrfs_trans_handle *trans, } else { struct btrfs_disk_key copy_key;; - tback = (struct tree_backref *)back; + tback = to_tree_backref(back); bi = (struct btrfs_tree_block_info *)(ei + 1); memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); @@ -5825,7 +6683,7 @@ static int record_extent(struct btrfs_trans_handle *trans, u64 parent; int i; - dback = (struct data_backref *)back; + dback = to_data_backref(back); if (back->full_backref) parent = dback->parent; else @@ -5863,7 +6721,7 @@ static int record_extent(struct btrfs_trans_handle *trans, } else { u64 parent; - tback = (struct tree_backref *)back; + tback = to_tree_backref(back); if (back->full_backref) parent = tback->parent; else @@ -5874,23 +6732,13 @@ static int record_extent(struct btrfs_trans_handle *trans, parent, tback->root, 0, 0); fprintf(stderr, "adding new tree backref on " "start %llu len %llu parent %llu root %llu\n", - rec->start, rec->max_size, tback->parent, tback->root); + rec->start, rec->max_size, parent, tback->root); } - if (ret) - goto fail; fail: btrfs_release_path(path); return ret; } -struct extent_entry { - u64 bytenr; - u64 bytes; - int count; - int broken; - struct list_head list; -}; - static struct extent_entry *find_entry(struct list_head *entries, u64 bytenr, u64 bytes) { @@ -5945,16 +6793,16 @@ static struct extent_entry *find_most_right_entry(struct list_head *entries) return best; } -static int repair_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path, struct data_backref *dback, struct extent_entry *entry) { + struct btrfs_trans_handle *trans; struct btrfs_root *root; struct btrfs_file_extent_item *fi; struct extent_buffer *leaf; struct btrfs_key key; u64 bytenr, bytes; - int ret; + int ret, err; key.objectid = dback->root; key.type = BTRFS_ROOT_ITEM_KEY; @@ -6006,11 +6854,9 @@ static int repair_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); - /* - * Have to make sure that this root gets updated when we commit the - * transaction - */ - record_root_in_trans(trans, root); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); /* * Ok we have the key of the file extent we want to fix, now we can cow @@ -6020,13 +6866,14 @@ static int repair_ref(struct btrfs_trans_handle *trans, if (ret < 0) { fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n", key.objectid, key.type, key.offset, ret); - return ret; + goto out; } if (ret > 0) { fprintf(stderr, "Well that's odd, we just found this key " "[%Lu, %u, %Lu]\n", key.objectid, key.type, key.offset); - return -EINVAL; + ret = -EINVAL; + goto out; } leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -6039,7 +6886,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "system and send it to a btrfs developer so they can " "complete this functionality for bytenr %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } if (dback->node.broken && dback->disk_bytenr != entry->bytenr) { @@ -6056,7 +6904,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += off_diff; btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); @@ -6070,7 +6919,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, " take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += dback->disk_bytenr; @@ -6091,15 +6941,16 @@ static int repair_ref(struct btrfs_trans_handle *trans, else printf("ram bytes may be wrong?\n"); btrfs_mark_buffer_dirty(leaf); +out: + err = btrfs_commit_transaction(trans, root); btrfs_release_path(path); - return 0; + return ret ? ret : err; } -static int verify_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct extent_record *rec) { - struct extent_backref *back; + struct extent_backref *back, *tmp; struct data_backref *dback; struct extent_entry *entry, *best = NULL; LIST_HEAD(entries); @@ -6115,11 +6966,12 @@ static int verify_backrefs(struct btrfs_trans_handle *trans, if (rec->metadata) return 0; - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { if (back->full_backref || !back->is_data) continue; - dback = (struct data_backref *)back; + dback = to_data_backref(back); /* * We only pay attention to backrefs that we found a real @@ -6241,11 +7093,12 @@ static int verify_backrefs(struct btrfs_trans_handle *trans, * Ok great we all agreed on an extent record, let's go find the real * references and fix up the ones that don't match. */ - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { if (back->full_backref || !back->is_data) continue; - dback = (struct data_backref *)back; + dback = to_data_backref(back); /* * Still ignoring backrefs that don't have a real ref attached @@ -6258,7 +7111,7 @@ static int verify_backrefs(struct btrfs_trans_handle *trans, dback->disk_bytenr == best->bytenr) continue; - ret = repair_ref(trans, info, path, dback, best); + ret = repair_ref(info, path, dback, best); if (ret) goto out; } @@ -6309,7 +7162,7 @@ static int process_duplicates(struct btrfs_root *root, */ remove_cache_extent(extent_cache, &rec->cache); - good = list_entry(rec->dups.next, struct extent_record, list); + good = to_extent_record(rec->dups.next); list_del_init(&good->list); INIT_LIST_HEAD(&good->backrefs); INIT_LIST_HEAD(&good->dups); @@ -6359,15 +7212,15 @@ static int process_duplicates(struct btrfs_root *root, return good->num_duplicates ? 0 : 1; } -static int delete_duplicate_records(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int delete_duplicate_records(struct btrfs_root *root, struct extent_record *rec) { + struct btrfs_trans_handle *trans; LIST_HEAD(delete_list); struct btrfs_path *path; struct extent_record *tmp, *good, *n; int nr_del = 0; - int ret = 0; + int ret = 0, err; struct btrfs_key key; path = btrfs_alloc_path(); @@ -6386,7 +7239,7 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, if (tmp->start + tmp->nr < good->start + good->nr) { fprintf(stderr, "Ok we have overlapping extents that " - "aren't completely covered by eachother, this " + "aren't completely covered by each other, this " "is going to require more careful thought. " "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n", tmp->start, tmp->nr, good->start, good->nr); @@ -6405,6 +7258,12 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, } root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + list_for_each_entry(tmp, &delete_list, list) { if (tmp->found_rec == 0) continue; @@ -6424,18 +7283,20 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, if (ret) { if (ret > 0) ret = -EINVAL; - goto out; + break; } ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + break; btrfs_release_path(path); nr_del++; } - + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; out: while (!list_empty(&delete_list)) { - tmp = list_entry(delete_list.next, struct extent_record, list); + tmp = to_extent_record(delete_list.next); list_del_init(&tmp->list); if (tmp == rec) continue; @@ -6443,7 +7304,7 @@ out: } while (!list_empty(&rec->dups)) { - tmp = list_entry(rec->dups.next, struct extent_record, list); + tmp = to_extent_record(rec->dups.next); list_del_init(&tmp->list); free(tmp); } @@ -6456,14 +7317,13 @@ out: return ret ? ret : nr_del; } -static int find_possible_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int find_possible_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct cache_tree *extent_cache, struct extent_record *rec) { struct btrfs_root *root; - struct extent_backref *back; + struct extent_backref *back, *tmp; struct data_backref *dback; struct cache_extent *cache; struct btrfs_file_extent_item *fi; @@ -6471,12 +7331,13 @@ static int find_possible_backrefs(struct btrfs_trans_handle *trans, u64 bytenr, bytes; int ret; - list_for_each_entry(back, &rec->backrefs, list) { + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { /* Don't care about full backrefs (poor unloved backrefs) */ if (back->full_backref || !back->is_data) continue; - dback = (struct data_backref *)back; + dback = to_data_backref(back); /* We found this one, we don't need to do a lookup */ if (dback->found_ref) @@ -6545,38 +7406,107 @@ static int find_possible_backrefs(struct btrfs_trans_handle *trans, } /* + * Record orphan data ref into corresponding root. + * + * Return 0 if the extent item contains data ref and recorded. + * Return 1 if the extent item contains no useful data ref + * On that case, it may contains only shared_dataref or metadata backref + * or the file extent exists(this should be handled by the extent bytenr + * recovery routine) + * Return <0 if something goes wrong. + */ +static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, + struct extent_record *rec) +{ + struct btrfs_key key; + struct btrfs_root *dest_root; + struct extent_backref *back, *tmp; + struct data_backref *dback; + struct orphan_data_extent *orphan; + struct btrfs_path *path; + int recorded_data_ref = 0; + int ret = 0; + + if (rec->metadata) + return 1; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { + if (back->full_backref || !back->is_data || + !back->found_extent_tree) + continue; + dback = to_data_backref(back); + if (dback->found_ref) + continue; + key.objectid = dback->root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + dest_root = btrfs_read_fs_root(fs_info, &key); + + /* For non-exist root we just skip it */ + if (IS_ERR(dest_root) || !dest_root) + continue; + + key.objectid = dback->owner; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = dback->offset; + + ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0); + /* + * For ret < 0, it's OK since the fs-tree may be corrupted, + * we need to record it for inode/file extent rebuild. + * For ret > 0, we record it only for file extent rebuild. + * For ret == 0, the file extent exists but only bytenr + * mismatch, let the original bytenr fix routine to handle, + * don't record it. + */ + if (ret == 0) + continue; + ret = 0; + orphan = malloc(sizeof(*orphan)); + if (!orphan) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&orphan->list); + orphan->root = dback->root; + orphan->objectid = dback->owner; + orphan->offset = dback->offset; + orphan->disk_bytenr = rec->cache.start; + orphan->disk_len = rec->cache.size; + list_add(&dest_root->orphan_data_extents, &orphan->list); + recorded_data_ref = 1; + } +out: + btrfs_free_path(path); + if (!ret) + return !recorded_data_ref; + else + return ret; +} + +/* * when an incorrect extent item is found, this will delete * all of the existing entries for it and recreate them * based on what the tree scan found. */ -static int fixup_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int fixup_extent_refs(struct btrfs_fs_info *info, struct cache_tree *extent_cache, struct extent_record *rec) { + struct btrfs_trans_handle *trans = NULL; int ret; struct btrfs_path *path; - struct list_head *cur = rec->backrefs.next; struct cache_extent *cache; - struct extent_backref *back; + struct extent_backref *back, *tmp; int allocated = 0; u64 flags = 0; - /* - * remember our flags for recreating the extent. - * FIXME, if we have cleared extent tree, we can not - * lookup extent info in extent tree. - */ - if (!init_extent_tree) { - ret = btrfs_lookup_extent_info(NULL, info->extent_root, - rec->start, rec->max_size, - rec->metadata, NULL, &flags); - if (ret < 0) - return ret; - } else { - if (rec->flag_block_full_backref) - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } + if (rec->flag_block_full_backref) + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; path = btrfs_alloc_path(); if (!path) @@ -6590,17 +7520,22 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, * them into the list if we find the backref so that * verify_backrefs can figure out what to do. */ - ret = find_possible_backrefs(trans, info, path, extent_cache, - rec); + ret = find_possible_backrefs(info, path, extent_cache, rec); if (ret < 0) goto out; } /* step one, make sure all of the backrefs agree */ - ret = verify_backrefs(trans, info, path, rec); + ret = verify_backrefs(info, path, rec); if (ret < 0) goto out; + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + /* step two, delete all the existing records */ ret = delete_extent_records(trans, info->extent_root, path, rec->start, rec->max_size); @@ -6617,10 +7552,8 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, } /* step three, recreate all the refs we did find */ - while(cur != &rec->backrefs) { - back = list_entry(cur, struct extent_backref, list); - cur = cur->next; - + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { /* * if we didn't find any references, don't create a * new extent record @@ -6628,6 +7561,7 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, if (!back->found_ref) continue; + rec->bad_full_backref = 0; ret = record_extent(trans, info, path, rec, back, allocated, flags); allocated = 1; @@ -6635,10 +7569,77 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, goto out; } out: + if (trans) { + int err = btrfs_commit_transaction(trans, info->extent_root); + if (!ret) + ret = err; + } + btrfs_free_path(path); return ret; } +static int fixup_extent_flags(struct btrfs_fs_info *fs_info, + struct extent_record *rec) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_path *path; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u64 flags; + int ret = 0; + + key.objectid = rec->start; + if (rec->metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = rec->info_level; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = rec->max_size; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return ret; + } else if (ret) { + fprintf(stderr, "Didn't find extent for %llu\n", + (unsigned long long)rec->start); + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return -ENOENT; + } + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path->nodes[0], ei); + if (rec->flag_block_full_backref) { + fprintf(stderr, "setting full backref on %llu\n", + (unsigned long long)key.objectid); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + fprintf(stderr, "clearing full backref on %llu\n", + (unsigned long long)key.objectid); + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + btrfs_set_extent_flags(path->nodes[0], ei, flags); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + return btrfs_commit_transaction(trans, root); +} + /* right now we only prune from the extent allocation tree */ static int prune_one_block(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info, @@ -6708,20 +7709,27 @@ out: return ret; } -static int prune_corrupt_blocks(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info) +static int prune_corrupt_blocks(struct btrfs_fs_info *info) { + struct btrfs_trans_handle *trans = NULL; struct cache_extent *cache; struct btrfs_corrupt_block *corrupt; - cache = search_cache_extent(info->corrupt_blocks, 0); while (1) { + cache = search_cache_extent(info->corrupt_blocks, 0); if (!cache) break; + if (!trans) { + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } corrupt = container_of(cache, struct btrfs_corrupt_block, cache); prune_one_block(trans, info, corrupt); - cache = next_cache_extent(cache); + remove_cache_extent(info->corrupt_blocks, cache); } + if (trans) + return btrfs_commit_transaction(trans, info->extent_root); return 0; } @@ -6751,8 +7759,7 @@ static void reset_cached_block_groups(struct btrfs_fs_info *fs_info) } } -static int check_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_extent_refs(struct btrfs_root *root, struct cache_tree *extent_cache) { struct extent_record *rec; @@ -6761,6 +7768,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, int ret = 0; int fixed = 0; int had_dups = 0; + int recorded = 0; if (repair) { /* @@ -6772,30 +7780,35 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, cache = search_cache_extent(extent_cache, 0); while(cache) { rec = container_of(cache, struct extent_record, cache); - btrfs_pin_extent(root->fs_info, - rec->start, rec->max_size); + set_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } /* pin down all the corrupted blocks too */ cache = search_cache_extent(root->fs_info->corrupt_blocks, 0); while(cache) { - btrfs_pin_extent(root->fs_info, - cache->start, cache->size); + set_extent_dirty(root->fs_info->excluded_extents, + cache->start, + cache->start + cache->size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } - prune_corrupt_blocks(trans, root->fs_info); + prune_corrupt_blocks(root->fs_info); reset_cached_block_groups(root->fs_info); } + reset_cached_block_groups(root->fs_info); + /* * We need to delete any duplicate entries we find first otherwise we * could mess up the extent tree when we have backrefs that actually * belong to a different extent item and not the weird duplicate one. */ while (repair && !list_empty(&duplicate_extents)) { - rec = list_entry(duplicate_extents.next, struct extent_record, - list); + rec = to_extent_record(duplicate_extents.next); list_del_init(&rec->list); /* Sometimes we can find a backref before we find an actual @@ -6807,7 +7820,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, */ if (process_duplicates(root, extent_cache, rec)) continue; - ret = delete_duplicate_records(trans, root, rec); + ret = delete_duplicate_records(root, rec); if (ret < 0) return ret; /* @@ -6823,7 +7836,10 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, return -EAGAIN; while(1) { + int cur_err = 0; + fixed = 0; + recorded = 0; cache = search_cache_extent(extent_cache, 0); if (!cache) break; @@ -6832,6 +7848,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, fprintf(stderr, "extent item %llu has multiple extent " "items\n", (unsigned long long)rec->start); err = 1; + cur_err = 1; } if (rec->refs != rec->extent_item_refs) { @@ -6841,47 +7858,97 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, fprintf(stderr, "extent item %llu, found %llu\n", (unsigned long long)rec->extent_item_refs, (unsigned long long)rec->refs); - if (!fixed && repair) { - ret = fixup_extent_refs(trans, root->fs_info, - extent_cache, rec); - if (ret) - goto repair_abort; - fixed = 1; + ret = record_orphan_data_extents(root->fs_info, rec); + if (ret < 0) + goto repair_abort; + if (ret == 0) { + recorded = 1; + } else { + /* + * we can't use the extent to repair file + * extent, let the fallback method handle it. + */ + if (!fixed && repair) { + ret = fixup_extent_refs( + root->fs_info, + extent_cache, rec); + if (ret) + goto repair_abort; + fixed = 1; + } } err = 1; - + cur_err = 1; } if (all_backpointers_checked(rec, 1)) { fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", (unsigned long long)rec->start, (unsigned long long)rec->nr); - if (!fixed && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + if (!fixed && !recorded && repair) { + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; fixed = 1; } - + cur_err = 1; err = 1; } if (!rec->owner_ref_checked) { fprintf(stderr, "owner ref check failed [%llu %llu]\n", (unsigned long long)rec->start, (unsigned long long)rec->nr); - if (!fixed && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + if (!fixed && !recorded && repair) { + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; fixed = 1; } err = 1; + cur_err = 1; + } + if (rec->bad_full_backref) { + fprintf(stderr, "bad full backref, on [%llu]\n", + (unsigned long long)rec->start); + if (repair) { + ret = fixup_extent_flags(root->fs_info, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + err = 1; + cur_err = 1; + } + /* + * Although it's not a extent ref's problem, we reuse this + * routine for error reporting. + * No repair function yet. + */ + if (rec->crossing_stripes) { + fprintf(stderr, + "bad metadata [%llu, %llu) crossing stripe boundary\n", + rec->start, rec->start + rec->max_size); + err = 1; + cur_err = 1; + } + + if (rec->wrong_chunk_type) { + fprintf(stderr, + "bad extent [%llu, %llu), type mismatch with chunk\n", + rec->start, rec->start + rec->max_size); + err = 1; + cur_err = 1; } remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); + if (!init_extent_tree && repair && (!cur_err || fixed)) + clear_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); free(rec); } repair_abort: @@ -6890,7 +7957,19 @@ repair_abort: fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); exit(1); } else if (!ret) { + struct btrfs_trans_handle *trans; + + root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto repair_abort; + } + btrfs_fix_block_accounting(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto repair_abort; } if (err) fprintf(stderr, "repaired damaged extent references\n"); @@ -6940,6 +8019,7 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, u64 devid; u64 offset; u64 length; + int metadump_v2 = 0; int i; int ret = 0; @@ -6952,7 +8032,8 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, cache); if (chunk_rec->length != block_group_rec->offset || chunk_rec->offset != block_group_rec->objectid || - chunk_rec->type_flags != block_group_rec->flags) { + (!metadump_v2 && + chunk_rec->type_flags != block_group_rec->flags)) { if (!silent) fprintf(stderr, "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n", @@ -6986,6 +8067,9 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, ret = 1; } + if (metadump_v2) + return ret; + length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length, chunk_rec->num_stripes); for (i = 0; i < chunk_rec->num_stripes; ++i) { @@ -7052,7 +8136,7 @@ int check_chunks(struct cache_tree *chunk_cache, cache); err = check_chunk_refs(chunk_rec, block_group_cache, dev_extent_cache, silent); - if (err) + if (err < 0) ret = err; if (err == 0 && good) list_add_tail(&chunk_rec->list, good); @@ -7151,7 +8235,7 @@ static int check_devices(struct rb_root *dev_cache, } static int add_root_item_to_list(struct list_head *head, - u64 objectid, u64 bytenr, + u64 objectid, u64 bytenr, u64 last_snapshot, u8 level, u8 drop_level, int level_size, struct btrfs_key *drop_key) { @@ -7165,6 +8249,7 @@ static int add_root_item_to_list(struct list_head *head, ri_rec->level = level; ri_rec->level_size = level_size; ri_rec->drop_level = drop_level; + ri_rec->last_snapshot = last_snapshot; if (drop_key) memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key)); list_add_tail(&ri_rec->list, head); @@ -7172,8 +8257,19 @@ static int add_root_item_to_list(struct list_head *head, return 0; } +static void free_root_item_list(struct list_head *list) +{ + struct root_item_record *ri_rec; + + while (!list_empty(list)) { + ri_rec = list_first_entry(list, struct root_item_record, + list); + list_del_init(&ri_rec->list); + free(ri_rec); + } +} + static int deal_root_from_list(struct list_head *list, - struct btrfs_trans_handle *trans, struct btrfs_root *root, struct block_info *bits, int bits_nr, @@ -7210,29 +8306,25 @@ static int deal_root_from_list(struct list_head *list, * one by one, otherwise we deal with node firstly which * can maximize readahead. */ - if (!init_extent_tree && !rec->drop_level) - goto skip; while (1) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, + pending, seen, reada, nodes, + extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, rec); if (ret != 0) break; } -skip: free_extent_buffer(buf); list_del(&rec->list); free(rec); + if (ret < 0) + break; } while (ret >= 0) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, pending, seen, + reada, nodes, extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, NULL); if (ret != 0) { if (ret > 0) @@ -7254,6 +8346,7 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct cache_tree pending; struct cache_tree reada; struct cache_tree nodes; + struct extent_io_tree excluded_extents; struct cache_tree corrupt_blocks; struct btrfs_path path; struct btrfs_key key; @@ -7262,7 +8355,6 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct block_info *bits; int bits_nr; struct extent_buffer *leaf; - struct btrfs_trans_handle *trans = NULL; int slot; struct btrfs_root_item ri; struct list_head dropping_trees; @@ -7283,15 +8375,12 @@ static int check_chunks_and_extents(struct btrfs_root *root) cache_tree_init(&nodes); cache_tree_init(&reada); cache_tree_init(&corrupt_blocks); + extent_io_tree_init(&excluded_extents); INIT_LIST_HEAD(&dropping_trees); INIT_LIST_HEAD(&normal_trees); if (repair) { - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - fprintf(stderr, "Error starting transaction\n"); - return PTR_ERR(trans); - } + root->fs_info->excluded_extents = &excluded_extents; root->fs_info->fsck_extent_cache = &extent_cache; root->fs_info->free_extent_hook = free_extent_hook; root->fs_info->corrupt_blocks = &corrupt_blocks; @@ -7304,19 +8393,24 @@ static int check_chunks_and_extents(struct btrfs_root *root) exit(1); } + if (ctx.progress_enabled) { + ctx.tp = TASK_EXTENTS; + task_start(ctx.info); + } + again: root1 = root->fs_info->tree_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, - btrfs_level_size(root1, level), NULL); + root1->node->start, 0, level, 0, + root1->nodesize, NULL); if (ret < 0) goto out; root1 = root->fs_info->chunk_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, - btrfs_level_size(root1, level), NULL); + root1->node->start, 0, level, 0, + root1->nodesize, NULL); if (ret < 0) goto out; btrfs_init_path(&path); @@ -7340,28 +8434,32 @@ again: btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) { unsigned long offset; + u64 last_snapshot; offset = btrfs_item_ptr_offset(leaf, path.slots[0]); read_extent_buffer(leaf, &ri, offset, sizeof(ri)); + last_snapshot = btrfs_root_last_snapshot(&ri); if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { level = btrfs_root_level(&ri); - level_size = btrfs_level_size(root, level); + level_size = root->nodesize; ret = add_root_item_to_list(&normal_trees, found_key.objectid, - btrfs_root_bytenr(&ri), level, + btrfs_root_bytenr(&ri), + last_snapshot, level, 0, level_size, NULL); if (ret < 0) goto out; } else { level = btrfs_root_level(&ri); - level_size = btrfs_level_size(root, level); + level_size = root->nodesize; objectid = found_key.objectid; btrfs_disk_key_to_cpu(&found_key, &ri.drop_progress); ret = add_root_item_to_list(&dropping_trees, objectid, btrfs_root_bytenr(&ri), - level, ri.drop_level, + last_snapshot, level, + ri.drop_level, level_size, &found_key); if (ret < 0) goto out; @@ -7370,66 +8468,59 @@ again: path.slots[0]++; } btrfs_release_path(&path); - ret = deal_root_from_list(&normal_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, + + /* + * check_block can return -EAGAIN if it fixes something, please keep + * this in mind when dealing with return values from these functions, if + * we get -EAGAIN we want to fall through and restart the loop. + */ + ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending, + &seen, &reada, &nodes, &extent_cache, &chunk_cache, &dev_cache, &block_group_cache, &dev_extent_cache); - if (ret < 0) + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; goto out; - ret = deal_root_from_list(&dropping_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, - &chunk_cache, &dev_cache, &block_group_cache, - &dev_extent_cache); - if (ret < 0) + } + ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr, + &pending, &seen, &reada, &nodes, + &extent_cache, &chunk_cache, &dev_cache, + &block_group_cache, &dev_extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; goto out; - if (ret >= 0) - ret = check_extent_refs(trans, root, &extent_cache); - if (ret == -EAGAIN) { - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto out; - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - free_chunk_cache_tree(&chunk_cache); - free_block_group_tree(&block_group_cache); - free_device_cache_tree(&dev_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_record_cache(root->fs_info, &extent_cache); - goto again; } - err = check_chunks(&chunk_cache, &block_group_cache, + ret = check_chunks(&chunk_cache, &block_group_cache, &dev_extent_cache, NULL, NULL, NULL, 0); - if (err && !ret) - ret = err; + if (ret) { + if (ret == -EAGAIN) + goto loop; + err = ret; + } - err = check_devices(&dev_cache, &dev_extent_cache); - if (err && !ret) + ret = check_extent_refs(root, &extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } + + ret = check_devices(&dev_cache, &dev_extent_cache); + if (ret && err) ret = err; out: - if (trans) { - err = btrfs_commit_transaction(trans, root); - if (!ret) - ret = err; - } + task_stop(ctx.info); if (repair) { free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + extent_io_tree_cleanup(&excluded_extents); root->fs_info->fsck_extent_cache = NULL; root->fs_info->free_extent_hook = NULL; root->fs_info->corrupt_blocks = NULL; + root->fs_info->excluded_extents = NULL; } free(bits); free_chunk_cache_tree(&chunk_cache); @@ -7441,6 +8532,408 @@ out: free_extent_cache_tree(&reada); free_extent_cache_tree(&nodes); return ret; +loop: + free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); + free_chunk_cache_tree(&chunk_cache); + free_block_group_tree(&block_group_cache); + free_device_cache_tree(&dev_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_record_cache(root->fs_info, &extent_cache); + free_root_item_list(&normal_trees); + free_root_item_list(&dropping_trees); + extent_io_tree_cleanup(&excluded_extents); + goto again; +} + +/* + * Check backrefs of a tree block given by @bytenr or @eb. + * + * @root: the root containing the @bytenr or @eb + * @eb: tree block extent buffer, can be NULL + * @bytenr: bytenr of the tree block to search + * @level: tree level of the tree block + * @owner: owner of the tree block + * + * Return >0 for any error found and output error message + * Return 0 for no error found + */ +static int check_tree_block_ref(struct btrfs_root *root, + struct extent_buffer *eb, u64 bytenr, + int level, u64 owner) +{ + struct btrfs_key key; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path path; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct extent_buffer *leaf; + unsigned long end; + unsigned long ptr; + int slot; + int skinny_level; + int type; + u32 nodesize = root->nodesize; + u32 item_size; + u64 offset; + int found_ref = 0; + int err = 0; + int ret; + + btrfs_init_path(&path); + key.objectid = bytenr; + if (btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + + /* Search for the backref in extent tree */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) { + err |= BACKREF_MISSING; + goto out; + } + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) { + err |= BACKREF_MISSING; + goto out; + } + + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + skinny_level = (int)key.offset; + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + skinny_level = btrfs_tree_block_level(leaf, info); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } + + if (eb) { + u64 header_gen; + u64 extent_gen; + + if (!(btrfs_extent_flags(leaf, ei) & + BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + error( + "extent[%llu %u] backref type mismatch, missing bit: %llx", + key.objectid, nodesize, + BTRFS_EXTENT_FLAG_TREE_BLOCK); + err = BACKREF_MISMATCH; + } + header_gen = btrfs_header_generation(eb); + extent_gen = btrfs_extent_generation(leaf, ei); + if (header_gen != extent_gen) { + error( + "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu", + key.objectid, nodesize, header_gen, + extent_gen); + err = BACKREF_MISMATCH; + } + if (level != skinny_level) { + error( + "extent[%llu %u] level mismatch, wanted: %u, have: %u", + key.objectid, nodesize, level, skinny_level); + err = BACKREF_MISMATCH; + } + if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) { + error( + "extent[%llu %u] is referred by other roots than %llu", + key.objectid, nodesize, root->objectid); + err = BACKREF_MISMATCH; + } + } + + /* + * Iterate the extent/metadata item to find the exact backref + */ + item_size = btrfs_item_size_nr(leaf, slot); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + if (type == BTRFS_TREE_BLOCK_REF_KEY && + (offset == root->objectid || offset == owner)) { + found_ref = 1; + } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { + /* Check if the backref points to valid referencer */ + found_ref = !check_tree_block_ref(root, NULL, offset, + level + 1, owner); + } + + if (found_ref) + break; + ptr += btrfs_extent_inline_ref_size(type); + } + + /* + * Inlined extent item doesn't have what we need, check + * TREE_BLOCK_REF_KEY + */ + if (!found_ref) { + btrfs_release_path(&path); + key.objectid = bytenr; + key.type = BTRFS_TREE_BLOCK_REF_KEY; + key.offset = root->objectid; + + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (!ret) + found_ref = 1; + } + if (!found_ref) + err |= BACKREF_MISSING; +out: + btrfs_release_path(&path); + if (eb && (err & BACKREF_MISSING)) + error("extent[%llu %u] backref lost (owner: %llu, level: %u)", + bytenr, nodesize, owner, level); + return err; +} + +/* + * Check EXTENT_DATA item, mainly for its dbackref in extent tree + * + * Return >0 any error found and output error message + * Return 0 for no error found + */ +static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *eb, int slot) +{ + struct btrfs_file_extent_item *fi; + struct btrfs_path path; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_key fi_key; + struct btrfs_key dbref_key; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + u64 owner; + u64 file_extent_gen; + u64 disk_bytenr; + u64 disk_num_bytes; + u64 extent_num_bytes; + u64 extent_flags; + u64 extent_gen; + u32 item_size; + unsigned long end; + unsigned long ptr; + int type; + u64 ref_root; + int found_dbackref = 0; + int err = 0; + int ret; + + btrfs_item_key_to_cpu(eb, &fi_key, slot); + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + file_extent_gen = btrfs_file_extent_generation(eb, fi); + + /* Nothing to check for hole and inline data extents */ + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE || + btrfs_file_extent_disk_bytenr(eb, fi) == 0) + return 0; + + disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi); + disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi); + + /* Check unaligned disk_num_bytes and num_bytes */ + if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, disk_num_bytes, + root->sectorsize); + err |= BYTES_UNALIGNED; + } else { + data_bytes_allocated += disk_num_bytes; + } + if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, extent_num_bytes, + root->sectorsize); + err |= BYTES_UNALIGNED; + } else { + data_bytes_referenced += extent_num_bytes; + } + owner = btrfs_header_owner(eb); + + /* Check the extent item of the file extent in extent tree */ + btrfs_init_path(&path); + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_ITEM_KEY; + dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi); + + ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0); + if (ret) { + err |= BACKREF_MISSING; + goto error; + } + + leaf = path.nodes[0]; + slot = path.slots[0]; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + extent_flags = btrfs_extent_flags(leaf, ei); + extent_gen = btrfs_extent_generation(leaf, ei); + + if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) { + error( + "extent[%llu %llu] backref type mismatch, wanted bit: %llx", + disk_bytenr, disk_num_bytes, + BTRFS_EXTENT_FLAG_DATA); + err |= BACKREF_MISMATCH; + } + + if (file_extent_gen < extent_gen) { + error( +"extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu", + disk_bytenr, disk_num_bytes, file_extent_gen, + extent_gen); + err |= BACKREF_MISMATCH; + } + + /* Check data backref inside that extent item */ + item_size = btrfs_item_size_nr(leaf, path.slots[0]); + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + ref_root = btrfs_extent_data_ref_root(leaf, dref); + if (ref_root == owner || ref_root == root->objectid) + found_dbackref = 1; + } else if (type == BTRFS_SHARED_DATA_REF_KEY) { + found_dbackref = !check_tree_block_ref(root, NULL, + btrfs_extent_inline_ref_offset(leaf, iref), + 0, owner); + } + + if (found_dbackref) + break; + ptr += btrfs_extent_inline_ref_size(type); + } + + /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */ + if (!found_dbackref) { + btrfs_release_path(&path); + + btrfs_init_path(&path); + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY; + dbref_key.offset = hash_extent_data_ref(root->objectid, + fi_key.objectid, fi_key.offset); + + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &dbref_key, &path, 0, 0); + if (!ret) + found_dbackref = 1; + } + + if (!found_dbackref) + err |= BACKREF_MISSING; +error: + btrfs_release_path(&path); + if (err & BACKREF_MISSING) { + error("data extent[%llu %llu] backref lost", + disk_bytenr, disk_num_bytes); + } + return err; +} + +/* + * Get real tree block level for the case like shared block + * Return >= 0 as tree level + * Return <0 for error + */ +static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr) +{ + struct extent_buffer *eb; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_extent_item *ei; + u64 flags; + u64 transid; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u8 backref_level; + u8 header_level; + int ret; + + /* Search extent tree for extent generation and level */ + key.objectid = bytenr; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0); + if (ret < 0) + goto release_out; + ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr); + if (ret < 0) + goto release_out; + if (ret > 0) { + ret = -ENOENT; + goto release_out; + } + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path.nodes[0], ei); + if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + ret = -ENOENT; + goto release_out; + } + + /* Get transid for later read_tree_block() check */ + transid = btrfs_extent_generation(path.nodes[0], ei); + + /* Get backref level as one source */ + if (key.type == BTRFS_METADATA_ITEM_KEY) { + backref_level = key.offset; + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + backref_level = btrfs_tree_block_level(path.nodes[0], info); + } + btrfs_release_path(&path); + + /* Get level from tree block as an alternative source */ + eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid); + if (!extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } + header_level = btrfs_header_level(eb); + free_extent_buffer(eb); + + if (header_level != backref_level) + return -EIO; + return header_level; + +release_out: + btrfs_release_path(&path); + return ret; } static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, @@ -7460,7 +8953,7 @@ static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, goto init; } c = btrfs_alloc_free_block(trans, root, - btrfs_level_size(root, 0), + root->nodesize, root->root_key.objectid, &disk_key, level, 0, 0); if (IS_ERR(c)) { @@ -7517,7 +9010,7 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, struct btrfs_root_item *ri; struct btrfs_key key; u64 bytenr; - u32 leafsize; + u32 nodesize; int level = btrfs_header_level(eb); int nritems; int ret; @@ -7534,7 +9027,7 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, btrfs_pin_extent(fs_info, eb->start, eb->len); - leafsize = btrfs_super_leafsize(fs_info->super_copy); + nodesize = btrfs_super_nodesize(fs_info->super_copy); nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { if (level == 0) { @@ -7556,8 +9049,8 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, * just pass in extent_root. */ tmp = read_tree_block(fs_info->extent_root, bytenr, - leafsize, 0); - if (!tmp) { + nodesize, 0); + if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading root block\n"); return -EIO; } @@ -7570,13 +9063,13 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, /* If we aren't the tree root don't read the block */ if (level == 1 && !tree_root) { - btrfs_pin_extent(fs_info, bytenr, leafsize); + btrfs_pin_extent(fs_info, bytenr, nodesize); continue; } tmp = read_tree_block(fs_info->extent_root, bytenr, - leafsize, 0); - if (!tmp) { + nodesize, 0); + if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading tree block\n"); return -EIO; } @@ -7865,7 +9358,7 @@ static int reinit_extent_tree(struct btrfs_trans_handle *trans, ret = reset_balance(trans, fs_info); if (ret) - fprintf(stderr, "error reseting the pending balance\n"); + fprintf(stderr, "error resetting the pending balance\n"); return ret; } @@ -7993,8 +9486,142 @@ static int populate_csum(struct btrfs_trans_handle *trans, return ret; } -static int fill_csum_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *csum_root) +static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root, + struct btrfs_root *cur_root) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *node; + struct btrfs_file_extent_item *fi; + char *buf = NULL; + u64 start = 0; + u64 len = 0; + int slot = 0; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + buf = malloc(cur_root->fs_info->csum_root->sectorsize); + if (!buf) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.offset = 0; + key.type = 0; + + ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0); + if (ret < 0) + goto out; + /* Iterate all regular file extents and fill its csum */ + while (1) { + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != BTRFS_EXTENT_DATA_KEY) + goto next; + node = path->nodes[0]; + slot = path->slots[0]; + fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG) + goto next; + start = btrfs_file_extent_disk_bytenr(node, fi); + len = btrfs_file_extent_disk_num_bytes(node, fi); + + ret = populate_csum(trans, csum_root, buf, start, len); + if (ret == -EEXIST) + ret = 0; + if (ret < 0) + goto out; +next: + /* + * TODO: if next leaf is corrupted, jump to nearest next valid + * leaf. + */ + ret = btrfs_next_item(cur_root, path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + } + +out: + btrfs_free_path(path); + free(buf); + return ret; +} + +static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root) +{ + struct btrfs_fs_info *fs_info = csum_root->fs_info; + struct btrfs_path *path; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *cur_root; + struct extent_buffer *node; + struct btrfs_key key; + int slot = 0; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_FS_TREE_OBJECTID; + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + while (1) { + node = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(node, &key, slot); + if (key.objectid > BTRFS_LAST_FREE_OBJECTID) + goto out; + if (key.type != BTRFS_ROOT_ITEM_KEY) + goto next; + if (!is_fstree(key.objectid)) + goto next; + key.offset = (u64)-1; + + cur_root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(cur_root) || !cur_root) { + fprintf(stderr, "Fail to read fs/subvol tree: %lld\n", + key.objectid); + goto out; + } + ret = fill_csum_tree_from_one_fs_root(trans, csum_root, + cur_root); + if (ret < 0) + goto out; +next: + ret = btrfs_next_item(tree_root, path); + if (ret > 0) { + ret = 0; + goto out; + } + if (ret < 0) + goto out; + } + +out: + btrfs_free_path(path); + return ret; +} + +static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root) { struct btrfs_root *extent_root = csum_root->fs_info->extent_root; struct btrfs_path *path; @@ -8062,17 +9689,22 @@ static int fill_csum_tree(struct btrfs_trans_handle *trans, return ret; } -struct root_item_info { - /* level of the root */ - u8 level; - /* number of nodes at this level, must be 1 for a root */ - int node_count; - u64 bytenr; - u64 gen; - struct cache_extent cache_extent; -}; - -static struct cache_tree *roots_info_cache = NULL; +/* + * Recalculate the csum and put it into the csum tree. + * + * Extent tree init will wipe out all the extent info, so in that case, we + * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we + * will use fs/subvol trees to init the csum tree. + */ +static int fill_csum_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root, + int search_fs_tree) +{ + if (search_fs_tree) + return fill_csum_tree_from_fs(trans, csum_root); + else + return fill_csum_tree_from_extent(trans, csum_root); +} static void free_roots_info_cache(void) { @@ -8163,11 +9795,11 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) iref = (struct btrfs_extent_inline_ref *)(ei + 1); level = found_key.offset; } else { - struct btrfs_tree_block_info *info; + struct btrfs_tree_block_info *binfo; - info = (struct btrfs_tree_block_info *)(ei + 1); - iref = (struct btrfs_extent_inline_ref *)(info + 1); - level = btrfs_tree_block_level(leaf, info); + binfo = (struct btrfs_tree_block_info *)(ei + 1); + iref = (struct btrfs_extent_inline_ref *)(binfo + 1); + level = btrfs_tree_block_level(leaf, binfo); } /* @@ -8375,6 +10007,8 @@ again: if (found_key.type != BTRFS_ROOT_ITEM_KEY) goto next; + if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + goto next; ret = maybe_repair_root_item(info, path, &found_key, trans ? 0 : 1); @@ -8395,8 +10029,9 @@ next: ret = 0; out: free_roots_info_cache(); - if (path) - btrfs_free_path(path); + btrfs_free_path(path); + if (trans) + btrfs_commit_transaction(trans, info->tree_root); if (ret < 0) return ret; @@ -8405,17 +10040,25 @@ out: const char * const cmd_check_usage[] = { "btrfs check [options] ", - "Check an unmounted btrfs filesystem.", + "Check structural integrity of a filesystem (unmounted).", + "Check structural integrity of an unmounted filesystem. Verify internal", + "trees' consistency and item connectivity. In the repair mode try to", + "fix the problems found.", + "WARNING: the repair mode is considered dangerous", "", "-s|--super use this superblock copy", - "-b|--backup use the backup root copy", + "-b|--backup use the first valid backup root copy", "--repair try to repair the filesystem", + "--readonly run in read-only mode (default)", "--init-csum-tree create a new CRC tree", "--init-extent-tree create a new extent tree", - "--check-data-csum verify checkums of data blocks", - "--qgroup-report print a report on qgroup consistency", - "--subvol-extents print subvolume extents and sharing state", - "--tree-root use the given bytenr for the tree root", + "--check-data-csum verify checksums of data blocks", + "-Q|--qgroup-report print a report on qgroup consistency", + "-E|--subvol-extents ", + " print subvolume extents and sharing state", + "-r|--tree-root use the given bytenr for the tree root", + "--chunk-root use the given bytenr for the chunk tree root", + "-p|--progress indicate progress", NULL }; @@ -8427,31 +10070,42 @@ int cmd_check(int argc, char **argv) u64 bytenr = 0; u64 subvolid = 0; u64 tree_root_bytenr = 0; + u64 chunk_root_bytenr = 0; char uuidbuf[BTRFS_UUID_UNPARSED_SIZE]; int ret; u64 num; int init_csum_tree = 0; + int readonly = 0; int qgroup_report = 0; + int qgroups_repaired = 0; enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE; while(1) { int c; - int option_index = 0; + enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM, + GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM, + GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE }; static const struct option long_options[] = { - { "super", 1, NULL, 's' }, - { "repair", 0, NULL, 0 }, - { "init-csum-tree", 0, NULL, 0 }, - { "init-extent-tree", 0, NULL, 0 }, - { "check-data-csum", 0, NULL, 0 }, - { "backup", 0, NULL, 0 }, - { "subvol-extents", 1, NULL, 'E' }, - { "qgroup-report", 0, NULL, 'Q' }, - { "tree-root", 1, NULL, 'r' }, + { "super", required_argument, NULL, 's' }, + { "repair", no_argument, NULL, GETOPT_VAL_REPAIR }, + { "readonly", no_argument, NULL, GETOPT_VAL_READONLY }, + { "init-csum-tree", no_argument, NULL, + GETOPT_VAL_INIT_CSUM }, + { "init-extent-tree", no_argument, NULL, + GETOPT_VAL_INIT_EXTENT }, + { "check-data-csum", no_argument, NULL, + GETOPT_VAL_CHECK_CSUM }, + { "backup", no_argument, NULL, 'b' }, + { "subvol-extents", required_argument, NULL, 'E' }, + { "qgroup-report", no_argument, NULL, 'Q' }, + { "tree-root", required_argument, NULL, 'r' }, + { "chunk-root", required_argument, NULL, + GETOPT_VAL_CHUNK_TREE }, + { "progress", no_argument, NULL, 'p' }, { NULL, 0, NULL, 0} }; - c = getopt_long(argc, argv, "as:br:", long_options, - &option_index); + c = getopt_long(argc, argv, "as:br:p", long_options, NULL); if (c < 0) break; switch(c) { @@ -8480,33 +10134,55 @@ int cmd_check(int argc, char **argv) case 'r': tree_root_bytenr = arg_strtou64(optarg); break; + case GETOPT_VAL_CHUNK_TREE: + chunk_root_bytenr = arg_strtou64(optarg); + break; + case 'p': + ctx.progress_enabled = true; + break; case '?': case 'h': usage(cmd_check_usage); + case GETOPT_VAL_REPAIR: + printf("enabling repair mode\n"); + repair = 1; + ctree_flags |= OPEN_CTREE_WRITES; + break; + case GETOPT_VAL_READONLY: + readonly = 1; + break; + case GETOPT_VAL_INIT_CSUM: + printf("Creating a new CRC tree\n"); + init_csum_tree = 1; + repair = 1; + ctree_flags |= OPEN_CTREE_WRITES; + break; + case GETOPT_VAL_INIT_EXTENT: + init_extent_tree = 1; + ctree_flags |= (OPEN_CTREE_WRITES | + OPEN_CTREE_NO_BLOCK_GROUPS); + repair = 1; + break; + case GETOPT_VAL_CHECK_CSUM: + check_data_csum = 1; + break; } - if (option_index == 1) { - printf("enabling repair mode\n"); - repair = 1; - ctree_flags |= OPEN_CTREE_WRITES; - } else if (option_index == 2) { - printf("Creating a new CRC tree\n"); - init_csum_tree = 1; - repair = 1; - ctree_flags |= OPEN_CTREE_WRITES; - } else if (option_index == 3) { - init_extent_tree = 1; - ctree_flags |= (OPEN_CTREE_WRITES | - OPEN_CTREE_NO_BLOCK_GROUPS); - repair = 1; - } else if (option_index == 4) { - check_data_csum = 1; - } - } - argc = argc - optind; - - if (check_argc_exact(argc, 1)) + } + + if (check_argc_exact(argc - optind, 1)) usage(cmd_check_usage); + if (ctx.progress_enabled) { + ctx.tp = TASK_NOTHING; + ctx.info = task_init(print_status_check, print_status_return, &ctx); + } + + /* This check is the only reason for --readonly to exist */ + if (readonly && repair) { + fprintf(stderr, "Repair options are not compatible with --readonly\n"); + exit(1); + } + radix_tree_init(); cache_tree_init(&root_cache); @@ -8524,13 +10200,14 @@ int cmd_check(int argc, char **argv) ctree_flags |= OPEN_CTREE_PARTIAL; info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr, - ctree_flags); + chunk_root_bytenr, ctree_flags); if (!info) { fprintf(stderr, "Couldn't open file system\n"); ret = -EIO; goto err_out; } + global_info = info; root = info->fs_root; /* @@ -8556,7 +10233,7 @@ int cmd_check(int argc, char **argv) uuidbuf); ret = qgroup_verify_all(info); if (ret == 0) - print_qgroup_report(1); + report_qgroups(1); goto close_out; } if (subvolid) { @@ -8601,7 +10278,8 @@ int cmd_check(int argc, char **argv) goto close_out; } - ret = fill_csum_tree(trans, info->csum_root); + ret = fill_csum_tree(trans, info->csum_root, + init_extent_tree); if (ret) { fprintf(stderr, "crc refilling failed\n"); return -EIO; @@ -8626,7 +10304,8 @@ int cmd_check(int argc, char **argv) goto close_out; } - fprintf(stderr, "checking extents\n"); + if (!ctx.progress_enabled) + fprintf(stderr, "checking extents\n"); ret = check_chunks_and_extents(root); if (ret) fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n"); @@ -8647,7 +10326,12 @@ int cmd_check(int argc, char **argv) goto close_out; } - fprintf(stderr, "checking free space cache\n"); + if (!ctx.progress_enabled) { + if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) + fprintf(stderr, "checking free space tree\n"); + else + fprintf(stderr, "checking free space cache\n"); + } ret = check_space_cache(root); if (ret) goto out; @@ -8660,7 +10344,8 @@ int cmd_check(int argc, char **argv) */ no_holes = btrfs_fs_incompat(root->fs_info, BTRFS_FEATURE_INCOMPAT_NO_HOLES); - fprintf(stderr, "checking fs roots\n"); + if (!ctx.progress_enabled) + fprintf(stderr, "checking fs roots\n"); ret = check_fs_roots(root, &root_cache); if (ret) goto out; @@ -8702,6 +10387,10 @@ int cmd_check(int argc, char **argv) err = qgroup_verify_all(info); if (err) goto out; + report_qgroups(0); + err = repair_qgroups(info, &qgroups_repaired); + if (err) + goto out; } if (!list_empty(&root->fs_info->recow_ebs)) { @@ -8709,7 +10398,10 @@ int cmd_check(int argc, char **argv) ret = 1; } out: - print_qgroup_report(0); + /* Don't override original ret */ + if (!ret && qgroups_repaired) + ret = qgroups_repaired; + if (found_old_backref) { /* * there was a disk format change when mixed * backref was in testing tree. The old format @@ -8735,11 +10427,14 @@ out: printf("file data blocks allocated: %llu\n referenced %llu\n", (unsigned long long)data_bytes_allocated, (unsigned long long)data_bytes_referenced); - printf("%s\n", BTRFS_BUILD_VERSION); + free_qgroup_counts(); free_root_recs_tree(&root_cache); close_out: close_ctree(root); err_out: + if (ctx.progress_enabled) + task_deinit(ctx.info); + return ret; }