X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=cmds-check.c;h=0a48535f908122507e2ec3c8fb4d808f2063be10;hb=05640939ea24c609c7c2badc76ebba9ec6a4896d;hp=8b96aa6ffc348133cee837788453da771b3696dd;hpb=bb06029f9b277060d1c9b6a578a10967bb7f4682;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/cmds-check.c b/cmds-check.c index 8b96aa6..0a48535 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -30,16 +30,32 @@ #include "repair.h" #include "disk-io.h" #include "print-tree.h" +#include "task-utils.h" #include "transaction.h" #include "utils.h" #include "commands.h" #include "free-space-cache.h" +#include "free-space-tree.h" #include "btrfsck.h" #include "qgroup-verify.h" #include "rbtree-utils.h" #include "backref.h" #include "ulist.h" +enum task_position { + TASK_EXTENTS, + TASK_FREE_SPACE, + TASK_FS_ROOTS, + TASK_NOTHING, /* have to be the last element */ +}; + +struct task_ctx { + int progress_enabled; + enum task_position tp; + + struct task_info *info; +}; + static u64 bytes_used = 0; static u64 total_csum_bytes = 0; static u64 total_btree_bytes = 0; @@ -55,6 +71,42 @@ static int repair = 0; static int no_holes = 0; static int init_extent_tree = 0; static int check_data_csum = 0; +static struct btrfs_fs_info *global_info; +static struct task_ctx ctx = { 0 }; + +static void *print_status_check(void *p) +{ + struct task_ctx *priv = p; + const char work_indicator[] = { '.', 'o', 'O', 'o' }; + uint32_t count = 0; + static char *task_position_string[] = { + "checking extents", + "checking free space cache", + "checking fs roots", + }; + + task_period_start(priv->info, 1000 /* 1s */); + + if (priv->tp == TASK_NOTHING) + return NULL; + + while (1) { + printf("%s [%c]\r", task_position_string[priv->tp], + work_indicator[count % 4]); + count++; + fflush(stdout); + task_period_wait(priv->info); + } + return NULL; +} + +static int print_status_return(void *p) +{ + printf("\n"); + fflush(stdout); + + return 0; +} struct extent_backref { struct list_head list; @@ -83,7 +135,8 @@ struct data_backref { /* * Much like data_backref, just removed the undetermined members * and change it to use list_head. - * Stored in the root->orphan_data_extents list + * During extent scan, it is stored in root->orphan_data_extent. + * During fs tree scan, it is then moved to inode_rec->orphan_data_extents. */ struct orphan_data_extent { struct list_head list; @@ -118,12 +171,15 @@ struct extent_record { u64 info_objectid; u32 num_duplicates; u8 info_level; + int flag_block_full_backref; unsigned int found_rec:1; unsigned int content_checked:1; unsigned int owner_ref_checked:1; unsigned int is_root:1; unsigned int metadata:1; - unsigned int flag_block_full_backref:1; + unsigned int bad_full_backref:1; + unsigned int crossing_stripes:1; + unsigned int wrong_chunk_type:1; }; struct inode_backref { @@ -144,6 +200,7 @@ struct root_item_record { struct list_head list; u64 objectid; u64 bytenr; + u64 last_snapshot; u8 level; u8 drop_level; int level_size; @@ -182,7 +239,7 @@ static u64 first_extent_gap(struct rb_root *holes) return hole->start; } -int compare_hole(struct rb_node *node1, struct rb_node *node2) +static int compare_hole(struct rb_node *node1, struct rb_node *node2) { struct file_extent_hole *hole1; struct file_extent_hole *hole2; @@ -283,8 +340,10 @@ static int del_file_extent_hole(struct rb_root *holes, { struct file_extent_hole *hole; struct file_extent_hole tmp; - struct file_extent_hole prev; - struct file_extent_hole next; + u64 prev_start = 0; + u64 prev_len = 0; + u64 next_start = 0; + u64 next_len = 0; struct rb_node *node; int have_prev = 0; int have_next = 0; @@ -304,24 +363,24 @@ static int del_file_extent_hole(struct rb_root *holes, * split(s) if they exists. */ if (start > hole->start) { - prev.start = hole->start; - prev.len = start - hole->start; + prev_start = hole->start; + prev_len = start - hole->start; have_prev = 1; } if (hole->start + hole->len > start + len) { - next.start = start + len; - next.len = hole->start + hole->len - start - len; + next_start = start + len; + next_len = hole->start + hole->len - start - len; have_next = 1; } rb_erase(node, holes); free(hole); if (have_prev) { - ret = add_file_extent_hole(holes, prev.start, prev.len); + ret = add_file_extent_hole(holes, prev_start, prev_len); if (ret < 0) return ret; } if (have_next) { - ret = add_file_extent_hole(holes, next.start, next.len); + ret = add_file_extent_hole(holes, next_start, next_len); if (ret < 0) return ret; } @@ -383,6 +442,7 @@ struct inode_record { u64 extent_start; u64 extent_end; struct rb_root holes; + struct list_head orphan_extents; u32 refs; }; @@ -401,6 +461,7 @@ struct inode_record { #define I_ERR_ODD_CSUM_ITEM (1 << 11) #define I_ERR_SOME_CSUM_MISSING (1 << 12) #define I_ERR_LINK_COUNT_WRONG (1 << 13) +#define I_ERR_FILE_EXTENT_ORPHAN (1 << 14) struct root_backref { struct list_head list; @@ -506,20 +567,77 @@ static struct inode_record *clone_inode_rec(struct inode_record *orig_rec) struct inode_record *rec; struct inode_backref *backref; struct inode_backref *orig; + struct inode_backref *tmp; + struct orphan_data_extent *src_orphan; + struct orphan_data_extent *dst_orphan; size_t size; + int ret; rec = malloc(sizeof(*rec)); + if (!rec) + return ERR_PTR(-ENOMEM); memcpy(rec, orig_rec, sizeof(*rec)); rec->refs = 1; INIT_LIST_HEAD(&rec->backrefs); + INIT_LIST_HEAD(&rec->orphan_extents); + rec->holes = RB_ROOT; list_for_each_entry(orig, &orig_rec->backrefs, list) { size = sizeof(*orig) + orig->namelen + 1; backref = malloc(size); + if (!backref) { + ret = -ENOMEM; + goto cleanup; + } memcpy(backref, orig, size); list_add_tail(&backref->list, &rec->backrefs); } + list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) { + dst_orphan = malloc(sizeof(*dst_orphan)); + if (!dst_orphan) { + ret = -ENOMEM; + goto cleanup; + } + memcpy(dst_orphan, src_orphan, sizeof(*src_orphan)); + list_add_tail(&dst_orphan->list, &rec->orphan_extents); + } + ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes); + BUG_ON(ret < 0); + return rec; + +cleanup: + if (!list_empty(&rec->backrefs)) + list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) { + list_del(&orig->list); + free(orig); + } + + if (!list_empty(&rec->orphan_extents)) + list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) { + list_del(&orig->list); + free(orig); + } + + free(rec); + + return ERR_PTR(ret); +} + +static void print_orphan_data_extents(struct list_head *orphan_extents, + u64 objectid) +{ + struct orphan_data_extent *orphan; + + if (list_empty(orphan_extents)) + return; + printf("The following data extent is lost in tree %llu:\n", + objectid); + list_for_each_entry(orphan, orphan_extents, list) { + printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n", + orphan->objectid, orphan->offset, orphan->disk_bytenr, + orphan->disk_len); + } } static void print_inode_error(struct btrfs_root *root, struct inode_record *rec) @@ -566,20 +684,31 @@ static void print_inode_error(struct btrfs_root *root, struct inode_record *rec) fprintf(stderr, ", some csum missing"); if (errors & I_ERR_LINK_COUNT_WRONG) fprintf(stderr, ", link count wrong"); + if (errors & I_ERR_FILE_EXTENT_ORPHAN) + fprintf(stderr, ", orphan file extent"); fprintf(stderr, "\n"); + /* Print the orphan extents if needed */ + if (errors & I_ERR_FILE_EXTENT_ORPHAN) + print_orphan_data_extents(&rec->orphan_extents, root->objectid); + /* Print the holes if needed */ if (errors & I_ERR_FILE_EXTENT_DISCOUNT) { struct file_extent_hole *hole; struct rb_node *node; + int found = 0; node = rb_first(&rec->holes); fprintf(stderr, "Found file extent holes:\n"); while (node) { + found = 1; hole = rb_entry(node, struct file_extent_hole, node); - fprintf(stderr, "\tstart: %llu, len:%llu\n", + fprintf(stderr, "\tstart: %llu, len: %llu\n", hole->start, hole->len); node = rb_next(node); } + if (!found) + fprintf(stderr, "\tstart: 0, len: %llu\n", + round_up(rec->isize, root->sectorsize)); } } @@ -628,18 +757,27 @@ static struct inode_record *get_inode_rec(struct cache_tree *inode_cache, rec = node->data; if (mod && rec->refs > 1) { node->data = clone_inode_rec(rec); + if (IS_ERR(node->data)) + return node->data; rec->refs--; rec = node->data; } } else if (mod) { rec = calloc(1, sizeof(*rec)); + if (!rec) + return ERR_PTR(-ENOMEM); rec->ino = ino; rec->extent_start = (u64)-1; rec->refs = 1; INIT_LIST_HEAD(&rec->backrefs); + INIT_LIST_HEAD(&rec->orphan_extents); rec->holes = RB_ROOT; node = malloc(sizeof(*node)); + if (!node) { + free(rec); + return ERR_PTR(-ENOMEM); + } node->cache.start = ino; node->cache.size = 1; node->data = rec; @@ -648,11 +786,24 @@ static struct inode_record *get_inode_rec(struct cache_tree *inode_cache, rec->found_link = 1; ret = insert_cache_extent(inode_cache, &node->cache); - BUG_ON(ret); + if (ret) + return ERR_PTR(-EEXIST); } return rec; } +static void free_orphan_data_extents(struct list_head *orphan_extents) +{ + struct orphan_data_extent *orphan; + + while (!list_empty(orphan_extents)) { + orphan = list_entry(orphan_extents->next, + struct orphan_data_extent, list); + list_del(&orphan->list); + free(orphan); + } +} + static void free_inode_rec(struct inode_record *rec) { struct inode_backref *backref; @@ -666,6 +817,7 @@ static void free_inode_rec(struct inode_record *rec) list_del(&backref->list); free(backref); } + free_orphan_data_extents(&rec->orphan_extents); free_file_extent_holes(&rec->holes); free(rec); } @@ -694,7 +846,8 @@ static void maybe_free_inode_rec(struct cache_tree *inode_cache, if (backref->found_dir_item && backref->found_dir_index) { if (backref->filetype != filetype) backref->errors |= REF_ERR_FILETYPE_UNMATCH; - if (!backref->errors && backref->found_inode_ref) { + if (!backref->errors && backref->found_inode_ref && + rec->nlink == rec->found_link) { list_del(&backref->list); free(backref); } @@ -800,6 +953,8 @@ static struct inode_backref *get_inode_backref(struct inode_record *rec, } backref = malloc(sizeof(*backref) + namelen + 1); + if (!backref) + return NULL; memset(backref, 0, sizeof(*backref)); backref->dir = dir; backref->namelen = namelen; @@ -818,7 +973,9 @@ static int add_inode_backref(struct cache_tree *inode_cache, struct inode_backref *backref; rec = get_inode_rec(inode_cache, ino, 1); + BUG_ON(IS_ERR(rec)); backref = get_inode_backref(rec, name, namelen, dir); + BUG_ON(!backref); if (errors) backref->errors |= errors; if (itemtype == BTRFS_DIR_INDEX_KEY) { @@ -972,6 +1129,7 @@ again: ins = node; } else { ins = malloc(sizeof(*ins)); + BUG_ON(!ins); ins->cache.start = node->cache.start; ins->cache.size = node->cache.size; ins->data = rec; @@ -980,6 +1138,7 @@ again: ret = insert_cache_extent(dst, &ins->cache); if (ret == -EEXIST) { conflict = get_inode_rec(dst, rec->ino, 1); + BUG_ON(IS_ERR(conflict)); merge_inode_recs(rec, conflict, dst); if (rec->checked) { conflict->checked = 1; @@ -1007,6 +1166,7 @@ again: maybe_free_inode_rec(dst, dst_node->current); } dst_node->current = get_inode_rec(dst, current_ino, 1); + BUG_ON(IS_ERR(dst_node->current)); } return 0; } @@ -1044,6 +1204,8 @@ static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs) struct shared_node *node; node = calloc(1, sizeof(*node)); + if (!node) + return -ENOMEM; node->cache.start = bytenr; node->cache.size = 1; cache_tree_init(&node->root_cache); @@ -1051,8 +1213,8 @@ static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs) node->refs = refs; ret = insert_cache_extent(shared, &node->cache); - BUG_ON(ret); - return 0; + + return ret; } static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs, @@ -1060,6 +1222,7 @@ static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs, { struct shared_node *node; struct shared_node *dest; + int ret; if (level == wc->active_node) return 0; @@ -1067,7 +1230,8 @@ static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs, BUG_ON(wc->active_node <= level); node = find_shared_node(&wc->shared, bytenr); if (!node) { - add_shared_node(&wc->shared, bytenr, refs); + ret = add_shared_node(&wc->shared, bytenr, refs); + BUG_ON(ret); node = find_shared_node(&wc->shared, bytenr); wc->nodes[level] = node; wc->active_node = level; @@ -1481,7 +1645,16 @@ static int process_file_extent(struct btrfs_root *root, } rec->extent_end = key->offset + num_bytes; - if (disk_bytenr > 0) { + /* + * The data reloc tree will copy full extents into its inode and then + * copy the corresponding csums. Because the extent it copied could be + * a preallocated extent that hasn't been written to yet there may be no + * csums to copy, ergo we won't have csums for our file extent. This is + * ok so just don't bother checking csums if the inode belongs to the + * data reloc tree. + */ + if (disk_bytenr > 0 && + btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) { u64 found; if (btrfs_file_extent_compression(eb, fi)) num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); @@ -1538,6 +1711,7 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb, } active_node->current = get_inode_rec(inode_cache, key.objectid, 1); + BUG_ON(IS_ERR(active_node->current)); } switch (key.type) { case BTRFS_DIR_ITEM_KEY: @@ -1707,7 +1881,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); - if (!next) { + if (!extent_buffer_uptodate(next)) { struct btrfs_key node_key; btrfs_node_key_to_cpu(path->nodes[*level], @@ -1851,6 +2025,39 @@ static int repair_inode_orphan_item(struct btrfs_trans_handle *trans, return ret; } +static int repair_inode_nbytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode_record *rec) +{ + struct btrfs_inode_item *ei; + struct btrfs_key key; + int ret = 0; + + key.objectid = rec->ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto out; + } + + /* Since ret == 0, no need to check anything */ + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size); + btrfs_mark_buffer_dirty(path->nodes[0]); + rec->errors &= ~I_ERR_FILE_NBYTES_WRONG; + printf("reset nbytes for ino %llu root %llu\n", + rec->ino, root->root_key.objectid); +out: + btrfs_release_path(path); + return ret; +} + static int add_missing_dir_index(struct btrfs_root *root, struct cache_tree *inode_cache, struct inode_record *rec, @@ -1905,6 +2112,7 @@ static int add_missing_dir_index(struct btrfs_root *root, backref->found_dir_index = 1; dir_rec = get_inode_rec(inode_cache, backref->dir, 0); + BUG_ON(IS_ERR(dir_rec)); if (!dir_rec) return 0; dir_rec->found_size += backref->namelen; @@ -2234,7 +2442,7 @@ static int reset_nlink(struct btrfs_trans_handle *trans, list_for_each_entry(backref, &rec->backrefs, list) { ret = btrfs_add_link(trans, root, rec->ino, backref->dir, backref->name, backref->namelen, - backref->ref_type, &backref->index, 1); + backref->filetype, &backref->index, 1); if (ret < 0) goto out; } @@ -2298,13 +2506,17 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino, mode); if (ret < 0) { - fprintf(stderr, "Failed to create '%s' dir: %s", + fprintf(stderr, "Failed to create '%s' dir: %s\n", dir_name, strerror(-ret)); goto out; } ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino, namebuf, namelen, type, NULL, 1); - if (ret == -EEXIST) { + /* + * Add ".INO" suffix several times to handle case where + * "FILENAME.INO" is already taken by another file. + */ + while (ret == -EEXIST) { /* * Conflicting file name, add ".INO" as suffix * +1 for '.' */ @@ -2322,7 +2534,7 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, } if (ret < 0) { fprintf(stderr, - "Failed to link the inode %llu to %s dir: %s", + "Failed to link the inode %llu to %s dir: %s\n", rec->ino, dir_name, strerror(-ret)); goto out; } @@ -2336,9 +2548,14 @@ static int repair_inode_nlinks(struct btrfs_trans_handle *trans, printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n", namelen, namebuf, dir_name); } - rec->errors &= ~I_ERR_LINK_COUNT_WRONG; printf("Fixed the nlink of inode %llu\n", rec->ino); out: + /* + * Clear the flag anyway, or we will loop forever for the same inode + * as it will not be removed from the bad inode list and the dead loop + * happens. + */ + rec->errors &= ~I_ERR_LINK_COUNT_WRONG; btrfs_release_path(path); return ret; } @@ -2424,12 +2641,6 @@ static int repair_inode_no_item(struct btrfs_trans_handle *trans, int type_recovered = 0; int ret = 0; - /* - * TODO: - * 1. salvage data from existing file extent and - * punch hole to keep fi ext consistent. - * 2. salvage data from extent tree - */ printf("Trying to rebuild inode:%llu\n", rec->ino); type_recovered = !find_file_type(rec, &filetype); @@ -2443,9 +2654,7 @@ static int repair_inode_no_item(struct btrfs_trans_handle *trans, * For undetermined one, use FILE as fallback. * * TODO: - * 1. If found extent belong to it in extent tree, it must be FILE - * Need extra hook in extent tree scan. - * 2. If found backref(inode_index/item is already handled) to it, + * 1. If found backref(inode_index/item is already handled) to it, * it must be DIR. * Need new inode-inode ref structure to allow search for that. */ @@ -2457,7 +2666,10 @@ static int repair_inode_no_item(struct btrfs_trans_handle *trans, } else if (rec->found_dir_item) { type_recovered = 1; filetype = BTRFS_FT_DIR; - } else { + } else if (!list_empty(&rec->orphan_extents)) { + type_recovered = 1; + filetype = BTRFS_FT_REG_FILE; + } else{ printf("Can't determint the filetype for inode %llu, assume it is a normal file\n", rec->ino); type_recovered = 1; @@ -2487,6 +2699,107 @@ out: return ret; } +static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode_record *rec) +{ + struct orphan_data_extent *orphan; + struct orphan_data_extent *tmp; + int ret = 0; + + list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) { + /* + * Check for conflicting file extents + * + * Here we don't know whether the extents is compressed or not, + * so we can only assume it not compressed nor data offset, + * and use its disk_len as extent length. + */ + ret = btrfs_get_extent(NULL, root, path, orphan->objectid, + orphan->offset, orphan->disk_len, 0); + btrfs_release_path(path); + if (ret < 0) + goto out; + if (!ret) { + fprintf(stderr, + "orphan extent (%llu, %llu) conflicts, delete the orphan\n", + orphan->disk_bytenr, orphan->disk_len); + ret = btrfs_free_extent(trans, + root->fs_info->extent_root, + orphan->disk_bytenr, orphan->disk_len, + 0, root->objectid, orphan->objectid, + orphan->offset); + if (ret < 0) + goto out; + } + ret = btrfs_insert_file_extent(trans, root, orphan->objectid, + orphan->offset, orphan->disk_bytenr, + orphan->disk_len, orphan->disk_len); + if (ret < 0) + goto out; + + /* Update file size info */ + rec->found_size += orphan->disk_len; + if (rec->found_size == rec->nbytes) + rec->errors &= ~I_ERR_FILE_NBYTES_WRONG; + + /* Update the file extent hole info too */ + ret = del_file_extent_hole(&rec->holes, orphan->offset, + orphan->disk_len); + if (ret < 0) + goto out; + if (RB_EMPTY_ROOT(&rec->holes)) + rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT; + + list_del(&orphan->list); + free(orphan); + } + rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN; +out: + return ret; +} + +static int repair_inode_discount_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode_record *rec) +{ + struct rb_node *node; + struct file_extent_hole *hole; + int found = 0; + int ret = 0; + + node = rb_first(&rec->holes); + + while (node) { + found = 1; + hole = rb_entry(node, struct file_extent_hole, node); + ret = btrfs_punch_hole(trans, root, rec->ino, + hole->start, hole->len); + if (ret < 0) + goto out; + ret = del_file_extent_hole(&rec->holes, hole->start, + hole->len); + if (ret < 0) + goto out; + if (RB_EMPTY_ROOT(&rec->holes)) + rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT; + node = rb_first(&rec->holes); + } + /* special case for a file losing all its file extent */ + if (!found) { + ret = btrfs_punch_hole(trans, root, rec->ino, 0, + round_up(rec->isize, root->sectorsize)); + if (ret < 0) + goto out; + } + printf("Fixed discount file extents for inode: %llu in root: %llu\n", + rec->ino, root->objectid); +out: + return ret; +} + static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) { struct btrfs_trans_handle *trans; @@ -2496,7 +2809,10 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM | I_ERR_LINK_COUNT_WRONG | - I_ERR_NO_INODE_ITEM))) + I_ERR_NO_INODE_ITEM | + I_ERR_FILE_EXTENT_ORPHAN | + I_ERR_FILE_EXTENT_DISCOUNT| + I_ERR_FILE_NBYTES_WRONG))) return rec->errors; path = btrfs_alloc_path(); @@ -2518,12 +2834,18 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec) if (rec->errors & I_ERR_NO_INODE_ITEM) ret = repair_inode_no_item(trans, root, path, rec); + if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN) + ret = repair_inode_orphan_extent(trans, root, path, rec); + if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT) + ret = repair_inode_discount_extent(trans, root, path, rec); if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG) ret = repair_inode_isize(trans, root, path, rec); if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM) ret = repair_inode_orphan_item(trans, root, path, rec); if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG) ret = repair_inode_nlinks(trans, root, path, rec); + if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG) + ret = repair_inode_nbytes(trans, root, path, rec); btrfs_commit_transaction(trans, root); btrfs_free_path(path); return ret; @@ -2615,6 +2937,7 @@ static int check_inode_recs(struct btrfs_root *root, return err; rec = get_inode_rec(inode_cache, root_dirid, 0); + BUG_ON(IS_ERR(rec)); if (rec) { ret = check_root_dir(rec); if (ret) { @@ -2722,13 +3045,16 @@ static struct root_record *get_root_rec(struct cache_tree *root_cache, rec = container_of(cache, struct root_record, cache); } else { rec = calloc(1, sizeof(*rec)); + if (!rec) + return ERR_PTR(-ENOMEM); rec->objectid = objectid; INIT_LIST_HEAD(&rec->backrefs); rec->cache.start = objectid; rec->cache.size = 1; ret = insert_cache_extent(root_cache, &rec->cache); - BUG_ON(ret); + if (ret) + return ERR_PTR(-EEXIST); } return rec; } @@ -2748,8 +3074,9 @@ static struct root_backref *get_root_backref(struct root_record *rec, return backref; } - backref = malloc(sizeof(*backref) + namelen + 1); - memset(backref, 0, sizeof(*backref)); + backref = calloc(1, sizeof(*backref) + namelen + 1); + if (!backref) + return NULL; backref->ref_root = ref_root; backref->dir = dir; backref->index = index; @@ -2787,7 +3114,9 @@ static int add_root_backref(struct cache_tree *root_cache, struct root_backref *backref; rec = get_root_rec(root_cache, root_id); + BUG_ON(IS_ERR(rec)); backref = get_root_backref(rec, ref_root, dir, index, name, namelen); + BUG_ON(!backref); backref->errors |= errors; @@ -2892,6 +3221,7 @@ static int check_root_refs(struct btrfs_root *root, int errors = 0; rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID); + BUG_ON(IS_ERR(rec)); rec->found_ref = 1; /* fixme: this can not detect circular references */ @@ -2913,6 +3243,7 @@ static int check_root_refs(struct btrfs_root *root, ref_root = get_root_rec(root_cache, backref->ref_root); + BUG_ON(IS_ERR(ref_root)); if (ref_root->found_ref > 0) continue; @@ -3131,34 +3462,6 @@ out_free_path: return ret; } -static void print_orphan_data_extents(struct list_head *orphan_extents, - u64 objectid) -{ - struct orphan_data_extent *orphan; - - if (list_empty(orphan_extents)) - return; - printf("The following data extent is lost in tree %llu:\n", - objectid); - list_for_each_entry(orphan, orphan_extents, list) { - printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n", - orphan->objectid, orphan->offset, orphan->disk_bytenr, - orphan->disk_len); - } -} - -static void free_orphan_data_extents(struct list_head *orphan_extents) -{ - struct orphan_data_extent *orphan; - - while (!list_empty(orphan_extents)) { - orphan = list_entry(orphan_extents->next, - struct orphan_data_extent, list); - list_del(&orphan->list); - free(orphan); - } -} - static int check_fs_root(struct btrfs_root *root, struct cache_tree *root_cache, struct walk_control *wc) @@ -3172,6 +3475,8 @@ static int check_fs_root(struct btrfs_root *root, struct root_record *rec; struct btrfs_root_item *root_item = &root->root_item; struct cache_tree corrupt_blocks; + struct orphan_data_extent *orphan; + struct orphan_data_extent *tmp; enum btrfs_tree_block_status status; /* @@ -3182,8 +3487,10 @@ static int check_fs_root(struct btrfs_root *root, */ cache_tree_init(&corrupt_blocks); root->fs_info->corrupt_blocks = &corrupt_blocks; + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { rec = get_root_rec(root_cache, root->root_key.objectid); + BUG_ON(IS_ERR(rec)); if (btrfs_root_refs(root_item) > 0) rec->found_root_item = 1; } @@ -3193,6 +3500,18 @@ static int check_fs_root(struct btrfs_root *root, cache_tree_init(&root_node.root_cache); cache_tree_init(&root_node.inode_cache); + /* Move the orphan extent record to corresponding inode_record */ + list_for_each_entry_safe(orphan, tmp, + &root->orphan_data_extents, list) { + struct inode_record *inode; + + inode = get_inode_rec(&root_node.inode_cache, orphan->objectid, + 1); + BUG_ON(IS_ERR(inode)); + inode->errors |= I_ERR_FILE_EXTENT_ORPHAN; + list_move(&orphan->list, &inode->orphan_extents); + } + level = btrfs_header_level(root->node); memset(wc->nodes, 0, sizeof(wc->nodes)); wc->nodes[level] = &root_node; @@ -3290,7 +3609,6 @@ skip_walking: free_corrupt_blocks_tree(&corrupt_blocks); root->fs_info->corrupt_blocks = NULL; - print_orphan_data_extents(&root->orphan_data_extents, root->objectid); free_orphan_data_extents(&root->orphan_data_extents); return ret; } @@ -3315,6 +3633,11 @@ static int check_fs_roots(struct btrfs_root *root, int ret; int err = 0; + if (ctx.progress_enabled) { + ctx.tp = TASK_FS_ROOTS; + task_start(ctx.info); + } + /* * Just in case we made any changes to the extent tree that weren't * reflected into the free space cache yet. @@ -3391,6 +3714,8 @@ out: if (!cache_tree_empty(&wc.shared)) fprintf(stderr, "warning line %d\n", __LINE__); + task_stop(ctx.info); + return err; } @@ -3532,7 +3857,6 @@ static void free_extent_record_cache(struct btrfs_fs_info *fs_info, if (!cache) break; rec = container_of(cache, struct extent_record, cache); - btrfs_unpin_extent(fs_info, rec->start, rec->max_size); remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); free(rec); @@ -3544,7 +3868,9 @@ static int maybe_free_extent_rec(struct cache_tree *extent_cache, { if (rec->content_checked && rec->owner_ref_checked && rec->extent_item_refs == rec->refs && rec->refs > 0 && - rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) { + rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) && + !rec->bad_full_backref && !rec->crossing_stripes && + !rec->wrong_chunk_type) { remove_cache_extent(extent_cache, &rec->cache); free_all_extent_backrefs(rec); list_del_init(&rec->list); @@ -3859,11 +4185,11 @@ again: * Attempt to fix basic block failures. If we can't fix it for whatever reason * then just return -EIO. */ -static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int try_to_fix_bad_block(struct btrfs_root *root, struct extent_buffer *buf, enum btrfs_tree_block_status status) { + struct btrfs_trans_handle *trans; struct ulist *roots; struct ulist_node *node; struct btrfs_root *search_root; @@ -3880,7 +4206,7 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, if (!path) return -EIO; - ret = btrfs_find_all_roots(trans, root->fs_info, buf->start, + ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots); if (ret) { btrfs_free_path(path); @@ -3899,7 +4225,12 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, break; } - record_root_in_trans(trans, search_root); + + trans = btrfs_start_transaction(search_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } path->lowest_level = btrfs_header_level(buf); path->skip_check_block = 1; @@ -3910,23 +4241,26 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1); if (ret) { ret = -EIO; + btrfs_commit_transaction(trans, search_root); break; } if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) ret = fix_key_order(trans, search_root, path); else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) ret = fix_item_offset(trans, search_root, path); - if (ret) + if (ret) { + btrfs_commit_transaction(trans, search_root); break; + } btrfs_release_path(path); + btrfs_commit_transaction(trans, search_root); } ulist_free(roots); btrfs_free_path(path); return ret; } -static int check_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_block(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, u64 flags) { @@ -3962,8 +4296,7 @@ static int check_block(struct btrfs_trans_handle *trans, if (status != BTRFS_TREE_BLOCK_CLEAN) { if (repair) - status = try_to_fix_bad_block(trans, root, buf, - status); + status = try_to_fix_bad_block(root, buf, status); if (status != BTRFS_TREE_BLOCK_CLEAN) { ret = -EIO; fprintf(stderr, "bad block %llu\n", @@ -4022,6 +4355,9 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec, u64 parent, u64 root) { struct tree_backref *ref = malloc(sizeof(*ref)); + + if (!ref) + return NULL; memset(&ref->node, 0, sizeof(ref->node)); if (parent > 0) { ref->parent = parent; @@ -4078,6 +4414,9 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec, u64 max_size) { struct data_backref *ref = malloc(sizeof(*ref)); + + if (!ref) + return NULL; memset(&ref->node, 0, sizeof(ref->node)); ref->node.is_data = 1; @@ -4101,6 +4440,56 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec, return ref; } +/* Check if the type of extent matches with its chunk */ +static void check_extent_type(struct extent_record *rec) +{ + struct btrfs_block_group_cache *bg_cache; + + bg_cache = btrfs_lookup_first_block_group(global_info, rec->start); + if (!bg_cache) + return; + + /* data extent, check chunk directly*/ + if (!rec->metadata) { + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) + rec->wrong_chunk_type = 1; + return; + } + + /* metadata extent, check the obvious case first */ + if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + rec->wrong_chunk_type = 1; + return; + } + + /* + * Check SYSTEM extent, as it's also marked as metadata, we can only + * make sure it's a SYSTEM extent by its backref + */ + if (!list_empty(&rec->backrefs)) { + struct extent_backref *node; + struct tree_backref *tback; + u64 bg_type; + + node = list_entry(rec->backrefs.next, struct extent_backref, + list); + if (node->is_data) { + /* tree block shouldn't have data backref */ + rec->wrong_chunk_type = 1; + return; + } + tback = container_of(node, struct tree_backref, node); + + if (tback->root == BTRFS_CHUNK_TREE_OBJECTID) + bg_type = BTRFS_BLOCK_GROUP_SYSTEM; + else + bg_type = BTRFS_BLOCK_GROUP_METADATA; + if (!(bg_cache->flags & bg_type)) + rec->wrong_chunk_type = 1; + } +} + static int add_extent_rec(struct cache_tree *extent_cache, struct btrfs_key *parent_key, u64 parent_gen, u64 start, u64 nr, u64 extent_item_refs, @@ -4184,10 +4573,22 @@ static int add_extent_rec(struct cache_tree *extent_cache, if (rec->max_size < max_size) rec->max_size = max_size; + /* + * A metadata extent can't cross stripe_len boundary, otherwise + * kernel scrub won't be able to handle it. + * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check + * it. + */ + if (metadata && check_crossing_stripes(rec->start, + rec->max_size)) + rec->crossing_stripes = 1; + check_extent_type(rec); maybe_free_extent_rec(extent_cache, rec); return ret; } rec = malloc(sizeof(*rec)); + if (!rec) + return -ENOMEM; rec->start = start; rec->max_size = max_size; rec->nr = max(nr, max_size); @@ -4196,6 +4597,10 @@ static int add_extent_rec(struct cache_tree *extent_cache, rec->owner_ref_checked = 0; rec->num_duplicates = 0; rec->metadata = metadata; + rec->flag_block_full_backref = -1; + rec->bad_full_backref = 0; + rec->crossing_stripes = 0; + rec->wrong_chunk_type = 0; INIT_LIST_HEAD(&rec->backrefs); INIT_LIST_HEAD(&rec->dups); INIT_LIST_HEAD(&rec->list); @@ -4234,6 +4639,11 @@ static int add_extent_rec(struct cache_tree *extent_cache, rec->content_checked = 1; rec->owner_ref_checked = 1; } + + if (metadata) + if (check_crossing_stripes(rec->start, rec->max_size)) + rec->crossing_stripes = 1; + check_extent_type(rec); return ret; } @@ -4259,8 +4669,10 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, } back = find_tree_backref(rec, parent, root); - if (!back) + if (!back) { back = alloc_tree_backref(rec, parent, root); + BUG_ON(!back); + } if (found_ref) { if (back->node.found_ref) { @@ -4281,6 +4693,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, } back->node.found_extent_tree = 1; } + check_extent_type(rec); maybe_free_extent_rec(extent_cache, rec); return 0; } @@ -4318,9 +4731,11 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, */ back = find_data_backref(rec, parent, root, owner, offset, found_ref, bytenr, max_size); - if (!back) + if (!back) { back = alloc_data_backref(rec, parent, root, owner, offset, max_size); + BUG_ON(!back); + } if (found_ref) { BUG_ON(num_refs != 1); @@ -4550,14 +4965,12 @@ struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf, ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); num_stripes = btrfs_chunk_num_stripes(leaf, ptr); - rec = malloc(btrfs_chunk_record_size(num_stripes)); + rec = calloc(1, btrfs_chunk_record_size(num_stripes)); if (!rec) { fprintf(stderr, "memory allocation failed\n"); exit(-1); } - memset(rec, 0, btrfs_chunk_record_size(num_stripes)); - INIT_LIST_HEAD(&rec->list); INIT_LIST_HEAD(&rec->dextents); rec->bg_rec = NULL; @@ -4655,12 +5068,11 @@ btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key, struct btrfs_block_group_item *ptr; struct block_group_record *rec; - rec = malloc(sizeof(*rec)); + rec = calloc(1, sizeof(*rec)); if (!rec) { fprintf(stderr, "memory allocation failed\n"); exit(-1); } - memset(rec, 0, sizeof(*rec)); rec->cache.start = key->objectid; rec->cache.size = key->offset; @@ -4704,12 +5116,11 @@ btrfs_new_device_extent_record(struct extent_buffer *leaf, struct device_extent_record *rec; struct btrfs_dev_extent *ptr; - rec = malloc(sizeof(*rec)); + rec = calloc(1, sizeof(*rec)); if (!rec) { fprintf(stderr, "memory allocation failed\n"); exit(-1); } - memset(rec, 0, sizeof(*rec)); rec->cache.objectid = key->objectid; rec->cache.start = key->offset; @@ -4797,6 +5208,10 @@ static int process_extent_item(struct btrfs_root *root, ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); refs = btrfs_extent_refs(eb, ei); + if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; + else + metadata = 0; add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes, refs, 0, 0, 0, metadata, 1, num_bytes); @@ -5041,6 +5456,11 @@ static int check_space_cache(struct btrfs_root *root) return 0; } + if (ctx.progress_enabled) { + ctx.tp = TASK_FREE_SPACE; + task_start(ctx.info); + } + while (1) { cache = btrfs_lookup_first_block_group(root->fs_info, start); if (!cache) @@ -5057,53 +5477,41 @@ static int check_space_cache(struct btrfs_root *root) btrfs_remove_free_space_cache(cache); } - ret = load_free_space_cache(root->fs_info, cache); - if (!ret) - continue; - - ret = verify_space_cache(root, cache); - if (ret) { - fprintf(stderr, "cache appears valid but isnt %Lu\n", - cache->key.objectid); - error++; + if (btrfs_fs_compat_ro(root->fs_info, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { + ret = exclude_super_stripes(root, cache); + if (ret) { + fprintf(stderr, "could not exclude super stripes: %s\n", + strerror(-ret)); + error++; + continue; + } + ret = load_free_space_tree(root->fs_info, cache); + free_excluded_extents(root, cache); + if (ret < 0) { + fprintf(stderr, "could not load free space tree: %s\n", + strerror(-ret)); + error++; + continue; + } + error += ret; + } else { + ret = load_free_space_cache(root->fs_info, cache); + if (!ret) + continue; } - } - return error ? -EINVAL : 0; -} - -static int read_extent_data(struct btrfs_root *root, char *data, - u64 logical, u64 *len, int mirror) -{ - u64 offset = 0; - struct btrfs_multi_bio *multi = NULL; - struct btrfs_fs_info *info = root->fs_info; - struct btrfs_device *device; - int ret = 0; - u64 max_len = *len; - - ret = btrfs_map_block(&info->mapping_tree, READ, logical, len, - &multi, mirror, NULL); - if (ret) { - fprintf(stderr, "Couldn't map the block %llu\n", - logical + offset); - goto err; + ret = verify_space_cache(root, cache); + if (ret) { + fprintf(stderr, "cache appears valid but isnt %Lu\n", + cache->key.objectid); + error++; + } } - device = multi->stripes[0].dev; - if (device->fd == 0) - goto err; - if (*len > max_len) - *len = max_len; + task_stop(ctx.info); - ret = pread64(device->fd, data, *len, multi->stripes[0].physical); - if (ret != *len) - ret = -EIO; - else - ret = 0; -err: - kfree(multi); - return ret; + return error ? -EINVAL : 0; } static int check_extent_csums(struct btrfs_root *root, u64 bytenr, @@ -5421,27 +5829,41 @@ static int is_dropped_key(struct btrfs_key *key, return 0; } +/* + * Here are the rules for FULL_BACKREF. + * + * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set. + * 2) If btrfs_header_owner(buf) no longer points to buf then we have + * FULL_BACKREF set. + * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell + * if it happened after the relocation occurred since we'll have dropped the + * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and + * have no real way to know for sure. + * + * We process the blocks one root at a time, and we start from the lowest root + * objectid and go to the highest. So we can just lookup the owner backref for + * the record and if we don't find it then we know it doesn't exist and we have + * a FULL BACKREF. + * + * FIXME: if we ever start reclaiming root objectid's then we need to fix this + * assumption and simply indicate that we _think_ that the FULL BACKREF needs to + * be set or not and then we can check later once we've gathered all the refs. + */ static int calc_extent_flag(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, struct root_item_record *ri, u64 *flags) { - int i; - int nritems = btrfs_header_nritems(buf); - struct btrfs_key key; struct extent_record *rec; struct cache_extent *cache; - struct data_backref *dback; struct tree_backref *tback; - struct extent_buffer *new_buf; u64 owner = 0; - u64 bytenr; - u64 offset; - u64 ptr; - int size; - int ret; - u8 level; + + cache = lookup_cache_extent(extent_cache, buf->start, 1); + /* we have added this extent before */ + BUG_ON(!cache); + rec = container_of(cache, struct extent_record, cache); /* * Except file/reloc tree, we can not have @@ -5454,96 +5876,32 @@ static int calc_extent_flag(struct btrfs_root *root, */ if (buf->start == ri->bytenr) goto normal; - if (btrfs_is_leaf(buf)) { - /* - * we are searching from original root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(buf); - if (owner == ri->objectid) - goto normal; - /* - * we check every eb here, and if any of - * eb dosen't have original root refers - * to this eb, we set full backref flag for - * this extent, otherwise normal backref. - */ - for (i = 0; i < nritems; i++) { - struct btrfs_file_extent_item *fi; - btrfs_item_key_to_cpu(buf, &key, i); - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - if (btrfs_file_extent_disk_bytenr(buf, fi) == 0) - continue; - bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) - goto full_backref; - offset = btrfs_file_extent_offset(buf, fi); - rec = container_of(cache, struct extent_record, cache); - dback = find_data_backref(rec, 0, ri->objectid, owner, - key.offset - offset, 1, bytenr, bytenr); - if (!dback) - goto full_backref; - } + if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) goto full_backref; - } else { - level = btrfs_header_level(buf); - for (i = 0; i < nritems; i++) { - ptr = btrfs_node_blockptr(buf, i); - size = btrfs_level_size(root, level); - if (i == 0) { - new_buf = read_tree_block(root, ptr, size, 0); - if (!extent_buffer_uptodate(new_buf)) { - free_extent_buffer(new_buf); - ret = -EIO; - return ret; - } - /* - * we are searching from origin root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(new_buf); - free_extent_buffer(new_buf); - if (owner == ri->objectid) - goto normal; - } - cache = lookup_cache_extent(extent_cache, ptr, size); - if (!cache) - goto full_backref; - rec = container_of(cache, struct extent_record, cache); - tback = find_tree_backref(rec, 0, owner); - if (!tback) - goto full_backref; - } - } + owner = btrfs_header_owner(buf); + if (owner == ri->objectid) + goto normal; + + tback = find_tree_backref(rec, 0, owner); + if (!tback) + goto full_backref; normal: *flags = 0; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 0; + if (rec->flag_block_full_backref != -1 && + rec->flag_block_full_backref != 0) + rec->bad_full_backref = 1; return 0; full_backref: *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 1; + if (rec->flag_block_full_backref != -1 && + rec->flag_block_full_backref != 1) + rec->bad_full_backref = 1; return 0; } -static int run_next_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int run_next_block(struct btrfs_root *root, struct block_info *bits, int bits_nr, u64 *last, @@ -5559,6 +5917,7 @@ static int run_next_block(struct btrfs_trans_handle *trans, struct root_item_record *ri) { struct extent_buffer *buf; + struct extent_record *rec = NULL; u64 bytenr; u32 size; u64 parent; @@ -5611,8 +5970,6 @@ static int run_next_block(struct btrfs_trans_handle *trans, } cache = lookup_cache_extent(extent_cache, bytenr, size); if (cache) { - struct extent_record *rec; - rec = container_of(cache, struct extent_record, cache); gen = rec->parent_generation; } @@ -5627,32 +5984,64 @@ static int run_next_block(struct btrfs_trans_handle *trans, nritems = btrfs_header_nritems(buf); - /* - * FIXME, this only works only if we don't have any full - * backref mode. - */ + flags = 0; if (!init_extent_tree) { ret = btrfs_lookup_extent_info(NULL, root, bytenr, btrfs_header_level(buf), 1, NULL, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + } } else { flags = 0; ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + } + + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (ri != NULL && + ri->objectid != BTRFS_TREE_RELOC_OBJECTID && + ri->objectid == btrfs_header_owner(buf)) { + /* + * Ok we got to this block from it's original owner and + * we have FULL_BACKREF set. Relocation can leave + * converted blocks over so this is altogether possible, + * however it's not possible if the generation > the + * last snapshot, so check for this case. + */ + if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) && + btrfs_header_generation(buf) > ri->last_snapshot) { + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } + } + } else { + if (ri != NULL && + (ri->objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } } if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + rec->flag_block_full_backref = 1; parent = bytenr; owner = 0; } else { + rec->flag_block_full_backref = 0; parent = 0; owner = btrfs_header_owner(buf); } - ret = check_block(trans, root, extent_cache, buf, flags); + ret = check_block(root, extent_cache, buf, flags); if (ret) goto out; @@ -6133,10 +6522,8 @@ static int record_extent(struct btrfs_trans_handle *trans, parent, tback->root, 0, 0); fprintf(stderr, "adding new tree backref on " "start %llu len %llu parent %llu root %llu\n", - rec->start, rec->max_size, tback->parent, tback->root); + rec->start, rec->max_size, parent, tback->root); } - if (ret) - goto fail; fail: btrfs_release_path(path); return ret; @@ -6204,16 +6591,16 @@ static struct extent_entry *find_most_right_entry(struct list_head *entries) return best; } -static int repair_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path, struct data_backref *dback, struct extent_entry *entry) { + struct btrfs_trans_handle *trans; struct btrfs_root *root; struct btrfs_file_extent_item *fi; struct extent_buffer *leaf; struct btrfs_key key; u64 bytenr, bytes; - int ret; + int ret, err; key.objectid = dback->root; key.type = BTRFS_ROOT_ITEM_KEY; @@ -6265,11 +6652,9 @@ static int repair_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); - /* - * Have to make sure that this root gets updated when we commit the - * transaction - */ - record_root_in_trans(trans, root); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); /* * Ok we have the key of the file extent we want to fix, now we can cow @@ -6279,13 +6664,14 @@ static int repair_ref(struct btrfs_trans_handle *trans, if (ret < 0) { fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n", key.objectid, key.type, key.offset, ret); - return ret; + goto out; } if (ret > 0) { fprintf(stderr, "Well that's odd, we just found this key " "[%Lu, %u, %Lu]\n", key.objectid, key.type, key.offset); - return -EINVAL; + ret = -EINVAL; + goto out; } leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -6298,7 +6684,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "system and send it to a btrfs developer so they can " "complete this functionality for bytenr %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } if (dback->node.broken && dback->disk_bytenr != entry->bytenr) { @@ -6315,7 +6702,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += off_diff; btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); @@ -6329,7 +6717,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, " take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += dback->disk_bytenr; @@ -6350,12 +6739,13 @@ static int repair_ref(struct btrfs_trans_handle *trans, else printf("ram bytes may be wrong?\n"); btrfs_mark_buffer_dirty(leaf); +out: + err = btrfs_commit_transaction(trans, root); btrfs_release_path(path); - return 0; + return ret ? ret : err; } -static int verify_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct extent_record *rec) { struct extent_backref *back; @@ -6517,7 +6907,7 @@ static int verify_backrefs(struct btrfs_trans_handle *trans, dback->disk_bytenr == best->bytenr) continue; - ret = repair_ref(trans, info, path, dback, best); + ret = repair_ref(info, path, dback, best); if (ret) goto out; } @@ -6618,15 +7008,15 @@ static int process_duplicates(struct btrfs_root *root, return good->num_duplicates ? 0 : 1; } -static int delete_duplicate_records(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int delete_duplicate_records(struct btrfs_root *root, struct extent_record *rec) { + struct btrfs_trans_handle *trans; LIST_HEAD(delete_list); struct btrfs_path *path; struct extent_record *tmp, *good, *n; int nr_del = 0; - int ret = 0; + int ret = 0, err; struct btrfs_key key; path = btrfs_alloc_path(); @@ -6664,6 +7054,12 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, } root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + list_for_each_entry(tmp, &delete_list, list) { if (tmp->found_rec == 0) continue; @@ -6683,15 +7079,17 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, if (ret) { if (ret > 0) ret = -EINVAL; - goto out; + break; } ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + break; btrfs_release_path(path); nr_del++; } - + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; out: while (!list_empty(&delete_list)) { tmp = list_entry(delete_list.next, struct extent_record, list); @@ -6715,8 +7113,7 @@ out: return ret ? ret : nr_del; } -static int find_possible_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int find_possible_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct cache_tree *extent_cache, struct extent_record *rec) @@ -6890,11 +7287,11 @@ out: * all of the existing entries for it and recreate them * based on what the tree scan found. */ -static int fixup_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int fixup_extent_refs(struct btrfs_fs_info *info, struct cache_tree *extent_cache, struct extent_record *rec) { + struct btrfs_trans_handle *trans = NULL; int ret; struct btrfs_path *path; struct list_head *cur = rec->backrefs.next; @@ -6903,21 +7300,8 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, int allocated = 0; u64 flags = 0; - /* - * remember our flags for recreating the extent. - * FIXME, if we have cleared extent tree, we can not - * lookup extent info in extent tree. - */ - if (!init_extent_tree) { - ret = btrfs_lookup_extent_info(NULL, info->extent_root, - rec->start, rec->max_size, - rec->metadata, NULL, &flags); - if (ret < 0) - return ret; - } else { - if (rec->flag_block_full_backref) - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } + if (rec->flag_block_full_backref) + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; path = btrfs_alloc_path(); if (!path) @@ -6931,17 +7315,22 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, * them into the list if we find the backref so that * verify_backrefs can figure out what to do. */ - ret = find_possible_backrefs(trans, info, path, extent_cache, - rec); + ret = find_possible_backrefs(info, path, extent_cache, rec); if (ret < 0) goto out; } /* step one, make sure all of the backrefs agree */ - ret = verify_backrefs(trans, info, path, rec); + ret = verify_backrefs(info, path, rec); if (ret < 0) goto out; + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + /* step two, delete all the existing records */ ret = delete_extent_records(trans, info->extent_root, path, rec->start, rec->max_size); @@ -6969,6 +7358,7 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, if (!back->found_ref) continue; + rec->bad_full_backref = 0; ret = record_extent(trans, info, path, rec, back, allocated, flags); allocated = 1; @@ -6976,10 +7366,77 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, goto out; } out: + if (trans) { + int err = btrfs_commit_transaction(trans, info->extent_root); + if (!ret) + ret = err; + } + btrfs_free_path(path); return ret; } +static int fixup_extent_flags(struct btrfs_fs_info *fs_info, + struct extent_record *rec) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_path *path; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u64 flags; + int ret = 0; + + key.objectid = rec->start; + if (rec->metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = rec->info_level; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = rec->max_size; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return ret; + } else if (ret) { + fprintf(stderr, "Didn't find extent for %llu\n", + (unsigned long long)rec->start); + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return -ENOENT; + } + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path->nodes[0], ei); + if (rec->flag_block_full_backref) { + fprintf(stderr, "setting full backref on %llu\n", + (unsigned long long)key.objectid); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + fprintf(stderr, "clearing full backref on %llu\n", + (unsigned long long)key.objectid); + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + btrfs_set_extent_flags(path->nodes[0], ei, flags); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + return btrfs_commit_transaction(trans, root); +} + /* right now we only prune from the extent allocation tree */ static int prune_one_block(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info, @@ -7049,20 +7506,27 @@ out: return ret; } -static int prune_corrupt_blocks(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info) +static int prune_corrupt_blocks(struct btrfs_fs_info *info) { + struct btrfs_trans_handle *trans = NULL; struct cache_extent *cache; struct btrfs_corrupt_block *corrupt; - cache = search_cache_extent(info->corrupt_blocks, 0); while (1) { + cache = search_cache_extent(info->corrupt_blocks, 0); if (!cache) break; + if (!trans) { + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } corrupt = container_of(cache, struct btrfs_corrupt_block, cache); prune_one_block(trans, info, corrupt); - cache = next_cache_extent(cache); + remove_cache_extent(info->corrupt_blocks, cache); } + if (trans) + return btrfs_commit_transaction(trans, info->extent_root); return 0; } @@ -7092,8 +7556,7 @@ static void reset_cached_block_groups(struct btrfs_fs_info *fs_info) } } -static int check_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_extent_refs(struct btrfs_root *root, struct cache_tree *extent_cache) { struct extent_record *rec; @@ -7114,22 +7577,28 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, cache = search_cache_extent(extent_cache, 0); while(cache) { rec = container_of(cache, struct extent_record, cache); - btrfs_pin_extent(root->fs_info, - rec->start, rec->max_size); + set_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } /* pin down all the corrupted blocks too */ cache = search_cache_extent(root->fs_info->corrupt_blocks, 0); while(cache) { - btrfs_pin_extent(root->fs_info, - cache->start, cache->size); + set_extent_dirty(root->fs_info->excluded_extents, + cache->start, + cache->start + cache->size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } - prune_corrupt_blocks(trans, root->fs_info); + prune_corrupt_blocks(root->fs_info); reset_cached_block_groups(root->fs_info); } + reset_cached_block_groups(root->fs_info); + /* * We need to delete any duplicate entries we find first otherwise we * could mess up the extent tree when we have backrefs that actually @@ -7149,7 +7618,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, */ if (process_duplicates(root, extent_cache, rec)) continue; - ret = delete_duplicate_records(trans, root, rec); + ret = delete_duplicate_records(root, rec); if (ret < 0) return ret; /* @@ -7165,6 +7634,8 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, return -EAGAIN; while(1) { + int cur_err = 0; + fixed = 0; recorded = 0; cache = search_cache_extent(extent_cache, 0); @@ -7175,6 +7646,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, fprintf(stderr, "extent item %llu has multiple extent " "items\n", (unsigned long long)rec->start); err = 1; + cur_err = 1; } if (rec->refs != rec->extent_item_refs) { @@ -7195,7 +7667,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, * extent, let the fallback method handle it. */ if (!fixed && repair) { - ret = fixup_extent_refs(trans, + ret = fixup_extent_refs( root->fs_info, extent_cache, rec); if (ret) @@ -7204,7 +7676,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, } } err = 1; - + cur_err = 1; } if (all_backpointers_checked(rec, 1)) { fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", @@ -7212,12 +7684,13 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, (unsigned long long)rec->nr); if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; fixed = 1; } + cur_err = 1; err = 1; } if (!rec->owner_ref_checked) { @@ -7225,17 +7698,55 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, (unsigned long long)rec->start, (unsigned long long)rec->nr); if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; fixed = 1; } err = 1; + cur_err = 1; + } + if (rec->bad_full_backref) { + fprintf(stderr, "bad full backref, on [%llu]\n", + (unsigned long long)rec->start); + if (repair) { + ret = fixup_extent_flags(root->fs_info, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + err = 1; + cur_err = 1; + } + /* + * Although it's not a extent ref's problem, we reuse this + * routine for error reporting. + * No repair function yet. + */ + if (rec->crossing_stripes) { + fprintf(stderr, + "bad metadata [%llu, %llu) crossing stripe boundary\n", + rec->start, rec->start + rec->max_size); + err = 1; + cur_err = 1; + } + + if (rec->wrong_chunk_type) { + fprintf(stderr, + "bad extent [%llu, %llu), type mismatch with chunk\n", + rec->start, rec->start + rec->max_size); + err = 1; + cur_err = 1; } remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); + if (!init_extent_tree && repair && (!cur_err || fixed)) + clear_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); free(rec); } repair_abort: @@ -7244,7 +7755,19 @@ repair_abort: fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); exit(1); } else if (!ret) { + struct btrfs_trans_handle *trans; + + root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto repair_abort; + } + btrfs_fix_block_accounting(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto repair_abort; } if (err) fprintf(stderr, "repaired damaged extent references\n"); @@ -7294,6 +7817,7 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, u64 devid; u64 offset; u64 length; + int metadump_v2 = 0; int i; int ret = 0; @@ -7306,7 +7830,8 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, cache); if (chunk_rec->length != block_group_rec->offset || chunk_rec->offset != block_group_rec->objectid || - chunk_rec->type_flags != block_group_rec->flags) { + (!metadump_v2 && + chunk_rec->type_flags != block_group_rec->flags)) { if (!silent) fprintf(stderr, "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n", @@ -7340,6 +7865,9 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, ret = 1; } + if (metadump_v2) + return ret; + length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length, chunk_rec->num_stripes); for (i = 0; i < chunk_rec->num_stripes; ++i) { @@ -7406,7 +7934,7 @@ int check_chunks(struct cache_tree *chunk_cache, cache); err = check_chunk_refs(chunk_rec, block_group_cache, dev_extent_cache, silent); - if (err) + if (err < 0) ret = err; if (err == 0 && good) list_add_tail(&chunk_rec->list, good); @@ -7505,7 +8033,7 @@ static int check_devices(struct rb_root *dev_cache, } static int add_root_item_to_list(struct list_head *head, - u64 objectid, u64 bytenr, + u64 objectid, u64 bytenr, u64 last_snapshot, u8 level, u8 drop_level, int level_size, struct btrfs_key *drop_key) { @@ -7519,6 +8047,7 @@ static int add_root_item_to_list(struct list_head *head, ri_rec->level = level; ri_rec->level_size = level_size; ri_rec->drop_level = drop_level; + ri_rec->last_snapshot = last_snapshot; if (drop_key) memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key)); list_add_tail(&ri_rec->list, head); @@ -7526,8 +8055,19 @@ static int add_root_item_to_list(struct list_head *head, return 0; } +static void free_root_item_list(struct list_head *list) +{ + struct root_item_record *ri_rec; + + while (!list_empty(list)) { + ri_rec = list_first_entry(list, struct root_item_record, + list); + list_del_init(&ri_rec->list); + free(ri_rec); + } +} + static int deal_root_from_list(struct list_head *list, - struct btrfs_trans_handle *trans, struct btrfs_root *root, struct block_info *bits, int bits_nr, @@ -7564,29 +8104,25 @@ static int deal_root_from_list(struct list_head *list, * one by one, otherwise we deal with node firstly which * can maximize readahead. */ - if (!init_extent_tree && !rec->drop_level) - goto skip; while (1) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, + pending, seen, reada, nodes, + extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, rec); if (ret != 0) break; } -skip: free_extent_buffer(buf); list_del(&rec->list); free(rec); + if (ret < 0) + break; } while (ret >= 0) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, pending, seen, + reada, nodes, extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, NULL); if (ret != 0) { if (ret > 0) @@ -7608,6 +8144,7 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct cache_tree pending; struct cache_tree reada; struct cache_tree nodes; + struct extent_io_tree excluded_extents; struct cache_tree corrupt_blocks; struct btrfs_path path; struct btrfs_key key; @@ -7616,7 +8153,6 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct block_info *bits; int bits_nr; struct extent_buffer *leaf; - struct btrfs_trans_handle *trans = NULL; int slot; struct btrfs_root_item ri; struct list_head dropping_trees; @@ -7637,15 +8173,12 @@ static int check_chunks_and_extents(struct btrfs_root *root) cache_tree_init(&nodes); cache_tree_init(&reada); cache_tree_init(&corrupt_blocks); + extent_io_tree_init(&excluded_extents); INIT_LIST_HEAD(&dropping_trees); INIT_LIST_HEAD(&normal_trees); if (repair) { - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - fprintf(stderr, "Error starting transaction\n"); - return PTR_ERR(trans); - } + root->fs_info->excluded_extents = &excluded_extents; root->fs_info->fsck_extent_cache = &extent_cache; root->fs_info->free_extent_hook = free_extent_hook; root->fs_info->corrupt_blocks = &corrupt_blocks; @@ -7658,18 +8191,23 @@ static int check_chunks_and_extents(struct btrfs_root *root) exit(1); } + if (ctx.progress_enabled) { + ctx.tp = TASK_EXTENTS; + task_start(ctx.info); + } + again: root1 = root->fs_info->tree_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, + root1->node->start, 0, level, 0, btrfs_level_size(root1, level), NULL); if (ret < 0) goto out; root1 = root->fs_info->chunk_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, + root1->node->start, 0, level, 0, btrfs_level_size(root1, level), NULL); if (ret < 0) goto out; @@ -7694,15 +8232,18 @@ again: btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) { unsigned long offset; + u64 last_snapshot; offset = btrfs_item_ptr_offset(leaf, path.slots[0]); read_extent_buffer(leaf, &ri, offset, sizeof(ri)); + last_snapshot = btrfs_root_last_snapshot(&ri); if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { level = btrfs_root_level(&ri); level_size = btrfs_level_size(root, level); ret = add_root_item_to_list(&normal_trees, found_key.objectid, - btrfs_root_bytenr(&ri), level, + btrfs_root_bytenr(&ri), + last_snapshot, level, 0, level_size, NULL); if (ret < 0) goto out; @@ -7715,7 +8256,8 @@ again: ret = add_root_item_to_list(&dropping_trees, objectid, btrfs_root_bytenr(&ri), - level, ri.drop_level, + last_snapshot, level, + ri.drop_level, level_size, &found_key); if (ret < 0) goto out; @@ -7724,66 +8266,59 @@ again: path.slots[0]++; } btrfs_release_path(&path); - ret = deal_root_from_list(&normal_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, + + /* + * check_block can return -EAGAIN if it fixes something, please keep + * this in mind when dealing with return values from these functions, if + * we get -EAGAIN we want to fall through and restart the loop. + */ + ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending, + &seen, &reada, &nodes, &extent_cache, &chunk_cache, &dev_cache, &block_group_cache, &dev_extent_cache); - if (ret < 0) + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; goto out; - ret = deal_root_from_list(&dropping_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, - &chunk_cache, &dev_cache, &block_group_cache, - &dev_extent_cache); - if (ret < 0) + } + ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr, + &pending, &seen, &reada, &nodes, + &extent_cache, &chunk_cache, &dev_cache, + &block_group_cache, &dev_extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; goto out; - if (ret >= 0) - ret = check_extent_refs(trans, root, &extent_cache); - if (ret == -EAGAIN) { - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto out; - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - free_chunk_cache_tree(&chunk_cache); - free_block_group_tree(&block_group_cache); - free_device_cache_tree(&dev_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_record_cache(root->fs_info, &extent_cache); - goto again; } - err = check_chunks(&chunk_cache, &block_group_cache, + ret = check_chunks(&chunk_cache, &block_group_cache, &dev_extent_cache, NULL, NULL, NULL, 0); - if (err && !ret) - ret = err; + if (ret) { + if (ret == -EAGAIN) + goto loop; + err = ret; + } - err = check_devices(&dev_cache, &dev_extent_cache); - if (err && !ret) + ret = check_extent_refs(root, &extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } + + ret = check_devices(&dev_cache, &dev_extent_cache); + if (ret && err) ret = err; out: - if (trans) { - err = btrfs_commit_transaction(trans, root); - if (!ret) - ret = err; - } + task_stop(ctx.info); if (repair) { free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + extent_io_tree_cleanup(&excluded_extents); root->fs_info->fsck_extent_cache = NULL; root->fs_info->free_extent_hook = NULL; root->fs_info->corrupt_blocks = NULL; + root->fs_info->excluded_extents = NULL; } free(bits); free_chunk_cache_tree(&chunk_cache); @@ -7795,6 +8330,21 @@ out: free_extent_cache_tree(&reada); free_extent_cache_tree(&nodes); return ret; +loop: + free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); + free_chunk_cache_tree(&chunk_cache); + free_block_group_tree(&block_group_cache); + free_device_cache_tree(&dev_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_record_cache(root->fs_info, &extent_cache); + free_root_item_list(&normal_trees); + free_root_item_list(&dropping_trees); + extent_io_tree_cleanup(&excluded_extents); + goto again; } static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, @@ -7911,7 +8461,7 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, */ tmp = read_tree_block(fs_info->extent_root, bytenr, leafsize, 0); - if (!tmp) { + if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading root block\n"); return -EIO; } @@ -7930,7 +8480,7 @@ static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, tmp = read_tree_block(fs_info->extent_root, bytenr, leafsize, 0); - if (!tmp) { + if (!extent_buffer_uptodate(tmp)) { fprintf(stderr, "Error reading tree block\n"); return -EIO; } @@ -8347,8 +8897,142 @@ static int populate_csum(struct btrfs_trans_handle *trans, return ret; } -static int fill_csum_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *csum_root) +static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root, + struct btrfs_root *cur_root) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *node; + struct btrfs_file_extent_item *fi; + char *buf = NULL; + u64 start = 0; + u64 len = 0; + int slot = 0; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + buf = malloc(cur_root->fs_info->csum_root->sectorsize); + if (!buf) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.offset = 0; + key.type = 0; + + ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0); + if (ret < 0) + goto out; + /* Iterate all regular file extents and fill its csum */ + while (1) { + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != BTRFS_EXTENT_DATA_KEY) + goto next; + node = path->nodes[0]; + slot = path->slots[0]; + fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG) + goto next; + start = btrfs_file_extent_disk_bytenr(node, fi); + len = btrfs_file_extent_disk_num_bytes(node, fi); + + ret = populate_csum(trans, csum_root, buf, start, len); + if (ret == -EEXIST) + ret = 0; + if (ret < 0) + goto out; +next: + /* + * TODO: if next leaf is corrupted, jump to nearest next valid + * leaf. + */ + ret = btrfs_next_item(cur_root, path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + } + +out: + btrfs_free_path(path); + free(buf); + return ret; +} + +static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root) +{ + struct btrfs_fs_info *fs_info = csum_root->fs_info; + struct btrfs_path *path; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *cur_root; + struct extent_buffer *node; + struct btrfs_key key; + int slot = 0; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_FS_TREE_OBJECTID; + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + while (1) { + node = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(node, &key, slot); + if (key.objectid > BTRFS_LAST_FREE_OBJECTID) + goto out; + if (key.type != BTRFS_ROOT_ITEM_KEY) + goto next; + if (!is_fstree(key.objectid)) + goto next; + key.offset = (u64)-1; + + cur_root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(cur_root) || !cur_root) { + fprintf(stderr, "Fail to read fs/subvol tree: %lld\n", + key.objectid); + goto out; + } + ret = fill_csum_tree_from_one_fs_root(trans, csum_root, + cur_root); + if (ret < 0) + goto out; +next: + ret = btrfs_next_item(tree_root, path); + if (ret > 0) { + ret = 0; + goto out; + } + if (ret < 0) + goto out; + } + +out: + btrfs_free_path(path); + return ret; +} + +static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root) { struct btrfs_root *extent_root = csum_root->fs_info->extent_root; struct btrfs_path *path; @@ -8416,6 +9100,23 @@ static int fill_csum_tree(struct btrfs_trans_handle *trans, return ret; } +/* + * Recalculate the csum and put it into the csum tree. + * + * Extent tree init will wipe out all the extent info, so in that case, we + * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we + * will use fs/subvol trees to init the csum tree. + */ +static int fill_csum_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *csum_root, + int search_fs_tree) +{ + if (search_fs_tree) + return fill_csum_tree_from_fs(trans, csum_root); + else + return fill_csum_tree_from_extent(trans, csum_root); +} + struct root_item_info { /* level of the root */ u8 level; @@ -8517,11 +9218,11 @@ static int build_roots_info_cache(struct btrfs_fs_info *info) iref = (struct btrfs_extent_inline_ref *)(ei + 1); level = found_key.offset; } else { - struct btrfs_tree_block_info *info; + struct btrfs_tree_block_info *binfo; - info = (struct btrfs_tree_block_info *)(ei + 1); - iref = (struct btrfs_extent_inline_ref *)(info + 1); - level = btrfs_tree_block_level(leaf, info); + binfo = (struct btrfs_tree_block_info *)(ei + 1); + iref = (struct btrfs_extent_inline_ref *)(binfo + 1); + level = btrfs_tree_block_level(leaf, binfo); } /* @@ -8729,6 +9430,8 @@ again: if (found_key.type != BTRFS_ROOT_ITEM_KEY) goto next; + if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + goto next; ret = maybe_repair_root_item(info, path, &found_key, trans ? 0 : 1); @@ -8749,8 +9452,9 @@ next: ret = 0; out: free_roots_info_cache(); - if (path) - btrfs_free_path(path); + btrfs_free_path(path); + if (trans) + btrfs_commit_transaction(trans, info->tree_root); if (ret < 0) return ret; @@ -8759,17 +9463,25 @@ out: const char * const cmd_check_usage[] = { "btrfs check [options] ", - "Check an unmounted btrfs filesystem.", + "Check structural inegrity of a filesystem (unmounted).", + "Check structural inegrity of an unmounted filesystem. Verify internal", + "trees' consistency and item connectivity. In the repair mode try to", + "fix the problems found.", + "WARNING: the repair mode is considered dangerous", "", "-s|--super use this superblock copy", "-b|--backup use the backup root copy", "--repair try to repair the filesystem", + "--readonly run in read-only mode (default)", "--init-csum-tree create a new CRC tree", "--init-extent-tree create a new extent tree", "--check-data-csum verify checkums of data blocks", - "--qgroup-report print a report on qgroup consistency", - "--subvol-extents print subvolume extents and sharing state", - "--tree-root use the given bytenr for the tree root", + "-Q|--qgroup-report print a report on qgroup consistency", + "-E|--subvol-extents ", + " print subvolume extents and sharing state", + "-r|--tree-root use the given bytenr for the tree root", + "-c|--chunk-root use the given bytenr for the chunk tree root", + "-p|--progress indicate progress", NULL }; @@ -8781,6 +9493,7 @@ int cmd_check(int argc, char **argv) u64 bytenr = 0; u64 subvolid = 0; u64 tree_root_bytenr = 0; + u64 chunk_root_bytenr = 0; char uuidbuf[BTRFS_UUID_UNPARSED_SIZE]; int ret; u64 num; @@ -8791,25 +9504,25 @@ int cmd_check(int argc, char **argv) while(1) { int c; - int option_index = 0; enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT, OPT_CHECK_CSUM, OPT_READONLY }; static const struct option long_options[] = { - { "super", 1, NULL, 's' }, - { "repair", 0, NULL, OPT_REPAIR }, - { "readonly", 0, NULL, OPT_READONLY }, - { "init-csum-tree", 0, NULL, OPT_INIT_CSUM }, - { "init-extent-tree", 0, NULL, OPT_INIT_EXTENT }, - { "check-data-csum", 0, NULL, OPT_CHECK_CSUM }, - { "backup", 0, NULL, 'b' }, - { "subvol-extents", 1, NULL, 'E' }, - { "qgroup-report", 0, NULL, 'Q' }, - { "tree-root", 1, NULL, 'r' }, + { "super", required_argument, NULL, 's' }, + { "repair", no_argument, NULL, OPT_REPAIR }, + { "readonly", no_argument, NULL, OPT_READONLY }, + { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM }, + { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT }, + { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM }, + { "backup", no_argument, NULL, 'b' }, + { "subvol-extents", required_argument, NULL, 'E' }, + { "qgroup-report", no_argument, NULL, 'Q' }, + { "tree-root", required_argument, NULL, 'r' }, + { "chunk-root", required_argument, NULL, 'c' }, + { "progress", no_argument, NULL, 'p' }, { NULL, 0, NULL, 0} }; - c = getopt_long(argc, argv, "as:br:", long_options, - &option_index); + c = getopt_long(argc, argv, "as:br:pc:", long_options, NULL); if (c < 0) break; switch(c) { @@ -8838,6 +9551,12 @@ int cmd_check(int argc, char **argv) case 'r': tree_root_bytenr = arg_strtou64(optarg); break; + case 'c': + chunk_root_bytenr = arg_strtou64(optarg); + break; + case 'p': + ctx.progress_enabled = true; + break; case '?': case 'h': usage(cmd_check_usage); @@ -8866,11 +9585,15 @@ int cmd_check(int argc, char **argv) break; } } - argc = argc - optind; - if (check_argc_exact(argc, 1)) + if (check_argc_exact(argc - optind, 1)) usage(cmd_check_usage); + if (ctx.progress_enabled) { + ctx.tp = TASK_NOTHING; + ctx.info = task_init(print_status_check, print_status_return, &ctx); + } + /* This check is the only reason for --readonly to exist */ if (readonly && repair) { fprintf(stderr, "Repair options are not compatible with --readonly\n"); @@ -8894,13 +9617,14 @@ int cmd_check(int argc, char **argv) ctree_flags |= OPEN_CTREE_PARTIAL; info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr, - ctree_flags); + chunk_root_bytenr, ctree_flags); if (!info) { fprintf(stderr, "Couldn't open file system\n"); ret = -EIO; goto err_out; } + global_info = info; root = info->fs_root; /* @@ -8971,7 +9695,8 @@ int cmd_check(int argc, char **argv) goto close_out; } - ret = fill_csum_tree(trans, info->csum_root); + ret = fill_csum_tree(trans, info->csum_root, + init_extent_tree); if (ret) { fprintf(stderr, "crc refilling failed\n"); return -EIO; @@ -8996,7 +9721,8 @@ int cmd_check(int argc, char **argv) goto close_out; } - fprintf(stderr, "checking extents\n"); + if (!ctx.progress_enabled) + fprintf(stderr, "checking extents\n"); ret = check_chunks_and_extents(root); if (ret) fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n"); @@ -9017,7 +9743,12 @@ int cmd_check(int argc, char **argv) goto close_out; } - fprintf(stderr, "checking free space cache\n"); + if (!ctx.progress_enabled) { + if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) + fprintf(stderr, "checking free space tree\n"); + else + fprintf(stderr, "checking free space cache\n"); + } ret = check_space_cache(root); if (ret) goto out; @@ -9030,7 +9761,8 @@ int cmd_check(int argc, char **argv) */ no_holes = btrfs_fs_incompat(root->fs_info, BTRFS_FEATURE_INCOMPAT_NO_HOLES); - fprintf(stderr, "checking fs roots\n"); + if (!ctx.progress_enabled) + fprintf(stderr, "checking fs roots\n"); ret = check_fs_roots(root, &root_cache); if (ret) goto out; @@ -9105,11 +9837,13 @@ out: printf("file data blocks allocated: %llu\n referenced %llu\n", (unsigned long long)data_bytes_allocated, (unsigned long long)data_bytes_referenced); - printf("%s\n", PACKAGE_STRING); free_root_recs_tree(&root_cache); close_out: close_ctree(root); err_out: + if (ctx.progress_enabled) + task_deinit(ctx.info); + return ret; }