X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=qgroup-verify.c;h=aa575fb6b08b971c0bde7cebbf8bc2c4b9a837f4;hb=52bfe9ef78fdf6c1f1c5ceb3f7a8bf8bf2164775;hp=1a0d38c341176971ddcf720684154944cbcf8465;hpb=6bdf962fe35a8648dc70b3c6724c0a3749d7d591;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/qgroup-verify.c b/qgroup-verify.c index 1a0d38c..aa575fb 100644 --- a/qgroup-verify.c +++ b/qgroup-verify.c @@ -29,13 +29,16 @@ #include "utils.h" #include "ulist.h" #include "rbtree-utils.h" +#include "transaction.h" +#include "repair.h" #include "qgroup-verify.h" /*#define QGROUP_VERIFY_DEBUG*/ static unsigned long tot_extents_scanned = 0; -static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive); +struct qgroup_count; +static struct qgroup_count *find_count(u64 qgroupid); struct qgroup_info { u64 referenced; @@ -54,6 +57,19 @@ struct qgroup_count { struct qgroup_info info; struct rb_node rb_node; + + /* Parents when we are a child group */ + struct list_head groups; + + /* + * Children when we are a parent group (not currently used but + * maintained to mirror kernel handling of qgroups) + */ + struct list_head members; + + u64 cur_refcnt; + + struct list_head bad_list; }; static struct counts_tree { @@ -63,9 +79,44 @@ static struct counts_tree { unsigned int qgroup_inconsist:1; } counts = { .root = RB_ROOT }; +static LIST_HEAD(bad_qgroups); + static struct rb_root by_bytenr = RB_ROOT; /* + * Glue structure to represent the relations between qgroups. Mirrored + * from kernel. + */ +struct btrfs_qgroup_list { + struct list_head next_group; + struct list_head next_member; + struct qgroup_count *group; /* Parent group */ + struct qgroup_count *member; +}; + +/* Allow us to reset ref counts during accounting without zeroing each group. */ +static u64 qgroup_seq = 1ULL; + +static inline void update_cur_refcnt(struct qgroup_count *c) +{ + if (c->cur_refcnt < qgroup_seq) + c->cur_refcnt = qgroup_seq; + c->cur_refcnt++; +} + +static inline u64 group_get_cur_refcnt(struct qgroup_count *c) +{ + if (c->cur_refcnt < qgroup_seq) + return 0; + return c->cur_refcnt - qgroup_seq; +} + +static void inc_qgroup_seq(int root_count) +{ + qgroup_seq += root_count + 1; +} + +/* * List of interior tree blocks. We walk this list after loading the * extent tree to resolve implied refs. For each interior node we'll * place a shared ref in the ref tree against each child object. This @@ -267,10 +318,11 @@ FREE_RB_BASED_TREE(ref, free_ref_node); /* * Resolves all the possible roots for the ref at parent. */ -static void find_parent_roots(struct ulist *roots, u64 parent) +static int find_parent_roots(struct ulist *roots, u64 parent) { struct ref *ref; struct rb_node *node; + int ret; /* * Search the rbtree for the first ref with bytenr == parent. @@ -278,9 +330,18 @@ static void find_parent_roots(struct ulist *roots, u64 parent) * For each unresolved root, we recurse */ ref = find_ref_bytenr(parent); + if (!ref) { + error("bytenr ref not found for parent %llu", + (unsigned long long)parent); + return -EIO; + } node = &ref->bytenr_node; - BUG_ON(ref == NULL); - BUG_ON(ref->bytenr != parent); + if (ref->bytenr != parent) { + error("found bytenr ref does not match parent: %llu != %llu", + (unsigned long long)ref->bytenr, + (unsigned long long)parent); + return -EIO; + } { /* @@ -289,22 +350,152 @@ static void find_parent_roots(struct ulist *roots, u64 parent) */ struct rb_node *prev_node = rb_prev(&ref->bytenr_node); struct ref *prev; + if (prev_node) { prev = rb_entry(prev_node, struct ref, bytenr_node); - BUG_ON(prev->bytenr == parent); + if (prev->bytenr == parent) { + error( + "unexpected: prev bytenr same as parent: %llu", + (unsigned long long)parent); + return -EIO; + } } } do { - if (ref->root) - ulist_add(roots, ref->root, 0, 0); - else - find_parent_roots(roots, ref->parent); + if (ref->root) { + if (is_fstree(ref->root)) { + ret = ulist_add(roots, ref->root, 0, 0); + if (ret < 0) + goto out; + } + } else if (ref->parent == ref->bytenr) { + /* + * Special loop case for tree reloc tree + */ + ref->root = BTRFS_TREE_RELOC_OBJECTID; + } else { + ret = find_parent_roots(roots, ref->parent); + if (ret < 0) + goto out; + } node = rb_next(node); if (node) ref = rb_entry(node, struct ref, bytenr_node); } while (node && ref->bytenr == parent); + + ret = 0; +out: + return ret; +} + +static int account_one_extent(struct ulist *roots, u64 bytenr, u64 num_bytes) +{ + int ret; + u64 id, nr_roots, nr_refs; + struct qgroup_count *count; + struct ulist *counts = ulist_alloc(0); + struct ulist *tmp = ulist_alloc(0); + struct ulist_iterator uiter; + struct ulist_iterator tmp_uiter; + struct ulist_node *unode; + struct ulist_node *tmp_unode; + struct btrfs_qgroup_list *glist; + + if (!counts || !tmp) { + ulist_free(counts); + ulist_free(tmp); + return ENOMEM; + } + + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(roots, &uiter))) { + BUG_ON(unode->val == 0ULL); + + /* + * For each root, find their corresponding tracking group and + * add it to our qgroups list. + */ + count = find_count(unode->val); + if (!count) + continue; + + BUG_ON(!is_fstree(unode->val)); + ret = ulist_add(counts, count->qgroupid, ptr_to_u64(count), 0); + if (ret < 0) + goto out; + + /* + * Now we look for parents (and parents of those...). Use a tmp + * ulist here to avoid re-walking (and re-incrementing) our + * already added items on every loop iteration. + */ + ulist_reinit(tmp); + ret = ulist_add(tmp, count->qgroupid, ptr_to_u64(count), 0); + if (ret < 0) + goto out; + + ULIST_ITER_INIT(&tmp_uiter); + while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { + /* Bump the refcount on a node every time we see it. */ + count = u64_to_ptr(tmp_unode->aux); + update_cur_refcnt(count); + + list_for_each_entry(glist, &count->groups, next_group) { + struct qgroup_count *parent; + parent = glist->group; + id = parent->qgroupid; + + BUG_ON(!count); + + ret = ulist_add(counts, id, ptr_to_u64(parent), + 0); + if (ret < 0) + goto out; + ret = ulist_add(tmp, id, ptr_to_u64(parent), + 0); + if (ret < 0) + goto out; + } + } + } + + /* + * Now that we have gathered up and counted all the groups, we + * can add bytes for this ref. + */ + nr_roots = roots->nnodes; + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(counts, &uiter))) { + count = u64_to_ptr(unode->aux); + + nr_refs = group_get_cur_refcnt(count); + if (nr_refs) { + count->info.referenced += num_bytes; + count->info.referenced_compressed += num_bytes; + + if (nr_refs == nr_roots) { + count->info.exclusive += num_bytes; + count->info.exclusive_compressed += num_bytes; + } + } +#ifdef QGROUP_VERIFY_DEBUG + printf("account (%llu, %llu), qgroup %llu/%llu, rfer %llu," + " excl %llu, refs %llu, roots %llu\n", bytenr, num_bytes, + btrfs_qgroup_level(count->qgroupid), + btrfs_qgroup_subvid(count->qgroupid), + count->info.referenced, count->info.exclusive, nr_refs, + nr_roots); +#endif + } + + inc_qgroup_seq(roots->nnodes); + ret = 0; +out: + ulist_free(counts); + ulist_free(tmp); + return ret; } static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes, @@ -318,18 +509,16 @@ static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes, * - resolve all possible roots for shared refs, insert each * of those into ref_roots ulist (this is a recursive process) * - * - Walk ref_roots ulist, adding extent bytes to each qgroup count that - * cooresponds to a found root. + * - With all roots resolved we can account the ref - this is done in + * account_one_extent(). */ -static void account_all_refs(int do_qgroups, u64 search_subvol) +static int account_all_refs(int do_qgroups, u64 search_subvol) { - int exclusive; struct ref *ref; struct rb_node *node; u64 bytenr, num_bytes; struct ulist *roots = ulist_alloc(0); - struct ulist_iterator uiter; - struct ulist_node *unode; + int ret; node = rb_first(&by_bytenr); while (node) { @@ -347,10 +536,16 @@ static void account_all_refs(int do_qgroups, u64 search_subvol) do { BUG_ON(ref->bytenr != bytenr); BUG_ON(ref->num_bytes != num_bytes); - if (ref->root) - ulist_add(roots, ref->root, 0, 0); - else - find_parent_roots(roots, ref->parent); + if (ref->root) { + if (is_fstree(ref->root)) { + if (ulist_add(roots, ref->root, 0, 0) < 0) + goto enomem; + } + } else { + ret = find_parent_roots(roots, ref->parent); + if (ret < 0) + goto enomem; + } /* * When we leave this inner loop, node is set @@ -362,29 +557,22 @@ static void account_all_refs(int do_qgroups, u64 search_subvol) ref = rb_entry(node, struct ref, bytenr_node); } while (node && ref->bytenr == bytenr); - /* - * Now that we have all roots, we can properly account - * this extent against the corresponding qgroups. - */ - if (roots->nnodes == 1) - exclusive = 1; - else - exclusive = 0; - if (search_subvol) print_subvol_info(search_subvol, bytenr, num_bytes, roots); - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(roots, &uiter))) { - BUG_ON(unode->val == 0ULL); - /* We only want to account fs trees */ - if (is_fstree(unode->val) && do_qgroups) - add_bytes(unode->val, num_bytes, exclusive); - } + if (!do_qgroups) + continue; + + if (account_one_extent(roots, bytenr, num_bytes)) + goto enomem; } ulist_free(roots); + return 0; +enomem: + error("Out of memory while accounting refs for qgroups"); + return -ENOMEM; } static u64 resolve_one_root(u64 bytenr) @@ -395,6 +583,8 @@ static u64 resolve_one_root(u64 bytenr) if (ref->root) return ref->root; + if (ref->parent == bytenr) + return BTRFS_TREE_RELOC_OBJECTID; return resolve_one_root(ref->parent); } @@ -565,6 +755,9 @@ static int add_refs_for_implied(struct btrfs_fs_info *info, u64 bytenr, struct btrfs_root *root; struct btrfs_key key; + /* Tree reloc tree doesn't contribute qgroup, skip it */ + if (root_id == BTRFS_TREE_RELOC_OBJECTID) + return 0; key.objectid = root_id; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; @@ -668,6 +861,9 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key, item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk); item->exclusive_compressed = btrfs_qgroup_info_exclusive_compressed(leaf, disk); + INIT_LIST_HEAD(&c->groups); + INIT_LIST_HEAD(&c->members); + INIT_LIST_HEAD(&c->bad_list); if (insert_count(c)) { free(c); @@ -677,42 +873,47 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key, return c; } -static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive) +static int add_qgroup_relation(u64 memberid, u64 parentid) { - struct qgroup_count *count = find_count(root_objectid); - struct qgroup_info *qg; + struct qgroup_count *member; + struct qgroup_count *parent; + struct btrfs_qgroup_list *list; - BUG_ON(num_bytes < 4096); /* Random sanity check. */ + if (memberid > parentid) + return 0; - if (!count) - return; + member = find_count(memberid); + parent = find_count(parentid); + if (!member || !parent) + return -ENOENT; - qg = &count->info; + list = calloc(1, sizeof(*list)); + if (!list) + return -ENOMEM; - qg->referenced += num_bytes; - /* - * count of compressed bytes is unimplemented, so we do the - * same as kernel. - */ - qg->referenced_compressed += num_bytes; + list->group = parent; + list->member = member; + list_add_tail(&list->next_group, &member->groups); + list_add_tail(&list->next_member, &parent->members); - if (exclusive) { - qg->exclusive += num_bytes; - qg->exclusive_compressed += num_bytes; - } + return 0; } -static void read_qgroup_status(struct btrfs_path *path, +static void read_qgroup_status(struct extent_buffer *eb, int slot, struct counts_tree *counts) { struct btrfs_qgroup_status_item *status_item; u64 flags; - status_item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_qgroup_status_item); - flags = btrfs_qgroup_status_flags(path->nodes[0], status_item); - counts->qgroup_inconsist = flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - counts->rescan_running = flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; + status_item = btrfs_item_ptr(eb, slot, struct btrfs_qgroup_status_item); + flags = btrfs_qgroup_status_flags(eb, status_item); + /* + * Since qgroup_inconsist/rescan_running is just one bit, + * assign value directly won't work. + */ + counts->qgroup_inconsist = !!(flags & + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT); + counts->rescan_running = !!(flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN); } static int load_quota_info(struct btrfs_fs_info *info) @@ -728,11 +929,18 @@ static int load_quota_info(struct btrfs_fs_info *info) struct btrfs_qgroup_info_item *item; struct qgroup_count *count; int i, nr; + int search_relations = 0; +loop: + /* + * Do 2 passes, the first allocates group counts and reads status + * items. The 2nd pass picks up relation items and glues them to their + * respective count structures. + */ btrfs_init_path(&path); key.offset = 0; - key.objectid = 0; + key.objectid = search_relations ? 0 : BTRFS_QGROUP_RELATION_KEY; key.type = 0; ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); @@ -749,17 +957,26 @@ static int load_quota_info(struct btrfs_fs_info *info) btrfs_item_key(leaf, &disk_key, i); btrfs_disk_key_to_cpu(&key, &disk_key); + if (search_relations) { + if (key.type == BTRFS_QGROUP_RELATION_KEY) { + ret = add_qgroup_relation(key.objectid, + key.offset); + if (ret) { + error("out of memory"); + goto out; + } + } + continue; + } + if (key.type == BTRFS_QGROUP_STATUS_KEY) { - read_qgroup_status(&path, &counts); + read_qgroup_status(leaf, i, &counts); continue; } - if (key.type == BTRFS_QGROUP_RELATION_KEY) - printf("Ignoring qgroup relation key %llu\n", - key.objectid); /* - * Ignore: BTRFS_QGROUP_LIMIT_KEY, - * BTRFS_QGROUP_RELATION_KEY + * At this point, we can ignore anything that + * isn't a qgroup info. */ if (key.type != BTRFS_QGROUP_INFO_KEY) continue; @@ -791,6 +1008,12 @@ static int load_quota_info(struct btrfs_fs_info *info) ret = 0; btrfs_release_path(&path); + + if (!search_relations) { + search_relations = 1; + goto loop; + } + out: return ret; } @@ -1035,6 +1258,11 @@ static void print_fields_signed(long long bytes, prefix, type, bytes, type, bytes_compressed); } +static inline int qgroup_printable(struct qgroup_count *c) +{ + return !!(c->subvol_exists || btrfs_qgroup_level(c->qgroupid)); +} + static int report_qgroup_difference(struct qgroup_count *count, int verbose) { int is_different; @@ -1045,9 +1273,10 @@ static int report_qgroup_difference(struct qgroup_count *count, int verbose) is_different = excl_diff || ref_diff; - if (verbose || (is_different && count->subvol_exists)) { - printf("Counts for qgroup id: %llu %s\n", - (unsigned long long)count->qgroupid, + if (verbose || (is_different && qgroup_printable(count))) { + printf("Counts for qgroup id: %llu/%llu %s\n", + btrfs_qgroup_level(count->qgroupid), + btrfs_qgroup_subvid(count->qgroupid), is_different ? "are different" : ""); print_fields(info->referenced, info->referenced_compressed, @@ -1065,34 +1294,68 @@ static int report_qgroup_difference(struct qgroup_count *count, int verbose) print_fields_signed(excl_diff, excl_diff, "diff:", "exclusive"); } - return (is_different && count->subvol_exists); + + return is_different; } -int report_qgroups(int all) +void report_qgroups(int all) { struct rb_node *node; struct qgroup_count *c; - int ret = 0; - if (counts.rescan_running) { + if (!repair && counts.rescan_running) { if (all) { printf( - "Qgroup rescan is running, qgroup counts difference is expected\n"); + "Qgroup rescan is running, a difference in qgroup counts is expected\n"); } else { printf( - "Qgroup rescan is running, ignore qgroup check\n"); - return ret; + "Qgroup rescan is running, qgroups will not be printed.\n"); + return; } } if (counts.qgroup_inconsist && !counts.rescan_running) - fprintf(stderr, "Qgroup is already inconsistent before checking\n"); + fprintf(stderr, "Qgroup are marked as inconsistent.\n"); node = rb_first(&counts.root); while (node) { c = rb_entry(node, struct qgroup_count, rb_node); - ret |= report_qgroup_difference(c, all); + + if (report_qgroup_difference(c, all)) + list_add_tail(&c->bad_list, &bad_qgroups); + node = rb_next(node); } - return ret; +} + +void free_qgroup_counts(void) +{ + struct rb_node *node; + struct qgroup_count *c; + struct btrfs_qgroup_list *glist, *tmpglist; + + node = rb_first(&counts.root); + while (node) { + c = rb_entry(node, struct qgroup_count, rb_node); + + list_del(&c->bad_list); + + list_for_each_entry_safe(glist, tmpglist, &c->groups, + next_group) { + list_del(&glist->next_group); + list_del(&glist->next_member); + free(glist); + } + list_for_each_entry_safe(glist, tmpglist, &c->members, + next_group) { + list_del(&glist->next_group); + list_del(&glist->next_member); + free(glist); + } + + node = rb_next(node); + + rb_erase(&c->rb_node, &counts.root); + free(c); + } } int qgroup_verify_all(struct btrfs_fs_info *info) @@ -1130,7 +1393,7 @@ int qgroup_verify_all(struct btrfs_fs_info *info) goto out; } - account_all_refs(1, 0); + ret = account_all_refs(1, 0); out: /* @@ -1202,7 +1465,7 @@ int print_extent_state(struct btrfs_fs_info *info, u64 subvol) } printf("Offset\t\tLen\tRoot Refs\tRoots\n"); - account_all_refs(0, subvol); + ret = account_all_refs(0, subvol); out: free_tree_blocks(); @@ -1210,3 +1473,140 @@ out: return ret; } +static int repair_qgroup_info(struct btrfs_fs_info *info, + struct qgroup_count *count) +{ + int ret; + struct btrfs_root *root = info->quota_root; + struct btrfs_trans_handle *trans; + struct btrfs_path path; + struct btrfs_qgroup_info_item *info_item; + struct btrfs_key key; + + printf("Repair qgroup %llu/%llu\n", btrfs_qgroup_level(count->qgroupid), + btrfs_qgroup_subvid(count->qgroupid)); + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + btrfs_init_path(&path); + key.objectid = 0; + key.type = BTRFS_QGROUP_INFO_KEY; + key.offset = count->qgroupid; + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); + if (ret) { + error("Could not find disk item for qgroup %llu/%llu.\n", + btrfs_qgroup_level(count->qgroupid), + btrfs_qgroup_subvid(count->qgroupid)); + if (ret > 0) + ret = -ENOENT; + goto out; + } + + info_item = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_qgroup_info_item); + + btrfs_set_qgroup_info_generation(path.nodes[0], info_item, + trans->transid); + + btrfs_set_qgroup_info_referenced(path.nodes[0], info_item, + count->info.referenced); + btrfs_set_qgroup_info_referenced_compressed(path.nodes[0], info_item, + count->info.referenced_compressed); + + btrfs_set_qgroup_info_exclusive(path.nodes[0], info_item, + count->info.exclusive); + btrfs_set_qgroup_info_exclusive_compressed(path.nodes[0], info_item, + count->info.exclusive_compressed); + + btrfs_mark_buffer_dirty(path.nodes[0]); + +out: + btrfs_commit_transaction(trans, root); + btrfs_release_path(&path); + + return ret; +} + +static int repair_qgroup_status(struct btrfs_fs_info *info) +{ + int ret; + struct btrfs_root *root = info->quota_root; + struct btrfs_trans_handle *trans; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_qgroup_status_item *status_item; + + printf("Repair qgroup status item\n"); + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + btrfs_init_path(&path); + key.objectid = 0; + key.type = BTRFS_QGROUP_STATUS_KEY; + key.offset = 0; + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); + if (ret) { + error("Could not find qgroup status item\n"); + if (ret > 0) + ret = -ENOENT; + goto out; + } + + status_item = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_qgroup_status_item); + btrfs_set_qgroup_status_flags(path.nodes[0], status_item, + BTRFS_QGROUP_STATUS_FLAG_ON); + btrfs_set_qgroup_status_rescan(path.nodes[0], status_item, 0); + btrfs_set_qgroup_status_generation(path.nodes[0], status_item, + trans->transid); + + btrfs_mark_buffer_dirty(path.nodes[0]); + +out: + btrfs_commit_transaction(trans, root); + btrfs_release_path(&path); + + return ret; +} + +int repair_qgroups(struct btrfs_fs_info *info, int *repaired) +{ + int ret; + struct qgroup_count *count, *tmpcount; + + *repaired = 0; + + if (!repair) + return 0; + + list_for_each_entry_safe(count, tmpcount, &bad_qgroups, bad_list) { + ret = repair_qgroup_info(info, count); + if (ret) { + goto out; + } + + (*repaired)++; + + list_del_init(&count->bad_list); + } + + /* + * Do this step last as we want the latest transaction id on + * our qgroup status to avoid a (useless) warning after + * mount. + */ + if (*repaired || counts.qgroup_inconsist || counts.rescan_running) { + ret = repair_qgroup_status(info); + if (ret) + goto out; + + (*repaired)++; + } + +out: + return ret; +}