btrfs-progs: check: verify qgroups above level 0
authorMark Fasheh <mfasheh@suse.de>
Fri, 17 Jun 2016 20:37:48 +0000 (13:37 -0700)
committerDavid Sterba <dsterba@suse.com>
Wed, 13 Jul 2016 16:44:13 +0000 (18:44 +0200)
At the moment we only check subvolume quota groups (level 0). With this
patch we can check groups above 0, thus verifying the entire qgroup
hierarchy on a file system.  The accounting portion of this patch is modeled
after the kernel - we are essentially reimplementing the 'quota rescan' case
here. Most other sections of this code went unchanged, in particular the
root counting works independently of the accounting.

Signed-off-by: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: David Sterba <dsterba@suse.com>
qgroup-verify.c

index 6ca95eb..43ae11e 100644 (file)
@@ -35,7 +35,8 @@
 /*#define QGROUP_VERIFY_DEBUG*/
 static unsigned long tot_extents_scanned = 0;
 
 /*#define QGROUP_VERIFY_DEBUG*/
 static unsigned long tot_extents_scanned = 0;
 
-static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive);
+struct qgroup_count;
+static struct qgroup_count *find_count(u64 qgroupid);
 
 struct qgroup_info {
        u64 referenced;
 
 struct qgroup_info {
        u64 referenced;
@@ -54,6 +55,17 @@ struct qgroup_count {
        struct qgroup_info info;
 
        struct rb_node rb_node;
        struct qgroup_info info;
 
        struct rb_node rb_node;
+
+       /* Parents when we are a child group */
+       struct list_head groups;
+
+       /*
+        * Children when we are a parent group (not currently used but
+        * maintained to mirror kernel handling of qgroups)
+        */
+       struct list_head members;
+
+       u64 cur_refcnt;
 };
 
 static struct counts_tree {
 };
 
 static struct counts_tree {
@@ -66,6 +78,39 @@ static struct counts_tree {
 static struct rb_root by_bytenr = RB_ROOT;
 
 /*
 static struct rb_root by_bytenr = RB_ROOT;
 
 /*
+ * Glue structure to represent the relations between qgroups. Mirrored
+ * from kernel.
+ */
+struct btrfs_qgroup_list {
+       struct list_head next_group;
+       struct list_head next_member;
+       struct qgroup_count *group; /* Parent group */
+       struct qgroup_count *member;
+};
+
+/* Allow us to reset ref counts during accounting without zeroing each group. */
+static u64 qgroup_seq = 1ULL;
+
+static inline void update_cur_refcnt(struct qgroup_count *c)
+{
+       if (c->cur_refcnt < qgroup_seq)
+               c->cur_refcnt = qgroup_seq;
+       c->cur_refcnt++;
+}
+
+static inline u64 group_get_cur_refcnt(struct qgroup_count *c)
+{
+       if (c->cur_refcnt < qgroup_seq)
+               return 0;
+       return c->cur_refcnt - qgroup_seq;
+}
+
+static void inc_qgroup_seq(int root_count)
+{
+       qgroup_seq += root_count + 1;
+}
+
+/*
  * List of interior tree blocks. We walk this list after loading the
  * extent tree to resolve implied refs. For each interior node we'll
  * place a shared ref in the ref tree against each child object. This
  * List of interior tree blocks. We walk this list after loading the
  * extent tree to resolve implied refs. For each interior node we'll
  * place a shared ref in the ref tree against each child object. This
@@ -296,10 +341,12 @@ static void find_parent_roots(struct ulist *roots, u64 parent)
        }
 
        do {
        }
 
        do {
-               if (ref->root)
-                       ulist_add(roots, ref->root, 0, 0);
-               else
+               if (ref->root) {
+                       if (is_fstree(ref->root))
+                               ulist_add(roots, ref->root, 0, 0);
+               } else {
                        find_parent_roots(roots, ref->parent);
                        find_parent_roots(roots, ref->parent);
+               }
 
                node = rb_next(node);
                if (node)
 
                node = rb_next(node);
                if (node)
@@ -307,6 +354,114 @@ static void find_parent_roots(struct ulist *roots, u64 parent)
        } while (node && ref->bytenr == parent);
 }
 
        } while (node && ref->bytenr == parent);
 }
 
+static int account_one_extent(struct ulist *roots, u64 bytenr, u64 num_bytes)
+{
+       int ret;
+       u64 id, nr_roots, nr_refs;
+       struct qgroup_count *count;
+       struct ulist *counts = ulist_alloc(0);
+       struct ulist *tmp = ulist_alloc(0);
+       struct ulist_iterator uiter;
+       struct ulist_iterator tmp_uiter;
+       struct ulist_node *unode;
+       struct ulist_node *tmp_unode;
+       struct btrfs_qgroup_list *glist;
+
+       if (!counts || !tmp) {
+               ulist_free(counts);
+               ulist_free(tmp);
+               return ENOMEM;
+       }
+
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(roots, &uiter))) {
+               BUG_ON(unode->val == 0ULL);
+
+               /*
+                * For each root, find their corresponding tracking group and
+                * add it to our qgroups list.
+                */
+               count = find_count(unode->val);
+               if (!count)
+                       continue;
+
+               BUG_ON(!is_fstree(unode->val));
+               ret = ulist_add(counts, count->qgroupid, ptr_to_u64(count), 0);
+               if (ret < 0)
+                       goto out;
+
+               /*
+                * Now we look for parents (and parents of those...). Use a tmp
+                * ulist here to avoid re-walking (and re-incrementing) our
+                * already added items on every loop iteration.
+                */
+               ulist_reinit(tmp);
+               ret = ulist_add(tmp, count->qgroupid, ptr_to_u64(count), 0);
+               if (ret < 0)
+                       goto out;
+
+               ULIST_ITER_INIT(&tmp_uiter);
+               while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+                       /* Bump the refcount on a node every time we see it. */
+                       count = u64_to_ptr(tmp_unode->aux);
+                       update_cur_refcnt(count);
+
+                       list_for_each_entry(glist, &count->groups, next_group) {
+                               struct qgroup_count *parent;
+                               parent = glist->group;
+                               id = parent->qgroupid;
+
+                               BUG_ON(!count);
+
+                               ret = ulist_add(counts, id, ptr_to_u64(parent),
+                                               0);
+                               if (ret < 0)
+                                       goto out;
+                               ret = ulist_add(tmp, id, ptr_to_u64(parent),
+                                               0);
+                               if (ret < 0)
+                                       goto out;
+                       }
+               }
+       }
+
+       /*
+        * Now that we have gathered up and counted all the groups, we
+        * can add bytes for this ref.
+        */
+       nr_roots = roots->nnodes;
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(counts, &uiter))) {
+               count = u64_to_ptr(unode->aux);
+
+               nr_refs = group_get_cur_refcnt(count);
+               if (nr_refs) {
+                       count->info.referenced += num_bytes;
+                       count->info.referenced_compressed += num_bytes;
+
+                       if (nr_refs == nr_roots) {
+                               count->info.exclusive += num_bytes;
+                               count->info.exclusive_compressed += num_bytes;
+                       }
+               }
+#ifdef QGROUP_VERIFY_DEBUG
+               printf("account (%llu, %llu), qgroup %llu/%llu, rfer %llu,"
+                      " excl %llu, refs %llu, roots %llu\n", bytenr, num_bytes,
+                      btrfs_qgroup_level(count->qgroupid),
+                      btrfs_qgroup_subvid(count->qgroupid),
+                      count->info.referenced, count->info.exclusive, nr_refs,
+                      nr_roots);
+#endif
+       }
+
+       inc_qgroup_seq(roots->nnodes);
+       ret = 0;
+out:
+       ulist_free(counts);
+       ulist_free(tmp);
+       return ret;
+}
+
 static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
                              struct ulist *roots);
 /*
 static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
                              struct ulist *roots);
 /*
@@ -318,18 +473,15 @@ static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
  * - resolve all possible roots for shared refs, insert each
  *   of those into ref_roots ulist (this is a recursive process)
  *
  * - resolve all possible roots for shared refs, insert each
  *   of those into ref_roots ulist (this is a recursive process)
  *
- * - Walk ref_roots ulist, adding extent bytes to each qgroup count that
- *    cooresponds to a found root.
+ * - With all roots resolved we can account the ref - this is done in
+ *   account_one_extent().
  */
  */
-static void account_all_refs(int do_qgroups, u64 search_subvol)
+static int account_all_refs(int do_qgroups, u64 search_subvol)
 {
 {
-       int exclusive;
        struct ref *ref;
        struct rb_node *node;
        u64 bytenr, num_bytes;
        struct ulist *roots = ulist_alloc(0);
        struct ref *ref;
        struct rb_node *node;
        u64 bytenr, num_bytes;
        struct ulist *roots = ulist_alloc(0);
-       struct ulist_iterator uiter;
-       struct ulist_node *unode;
 
        node = rb_first(&by_bytenr);
        while (node) {
 
        node = rb_first(&by_bytenr);
        while (node) {
@@ -347,10 +499,14 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
                do {
                        BUG_ON(ref->bytenr != bytenr);
                        BUG_ON(ref->num_bytes != num_bytes);
                do {
                        BUG_ON(ref->bytenr != bytenr);
                        BUG_ON(ref->num_bytes != num_bytes);
-                       if (ref->root)
-                               ulist_add(roots, ref->root, 0, 0);
-                       else
+                       if (ref->root) {
+                               if (is_fstree(ref->root)) {
+                                       if (ulist_add(roots, ref->root, 0, 0) < 0)
+                                               goto enomem;
+                               }
+                       } else {
                                find_parent_roots(roots, ref->parent);
                                find_parent_roots(roots, ref->parent);
+                       }
 
                        /*
                         * When we leave this inner loop, node is set
 
                        /*
                         * When we leave this inner loop, node is set
@@ -362,29 +518,22 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
                                ref = rb_entry(node, struct ref, bytenr_node);
                } while (node && ref->bytenr == bytenr);
 
                                ref = rb_entry(node, struct ref, bytenr_node);
                } while (node && ref->bytenr == bytenr);
 
-               /*
-                * Now that we have all roots, we can properly account
-                * this extent against the corresponding qgroups.
-                */
-               if (roots->nnodes == 1)
-                       exclusive = 1;
-               else
-                       exclusive = 0;
-
                if (search_subvol)
                        print_subvol_info(search_subvol, bytenr, num_bytes,
                                          roots);
 
                if (search_subvol)
                        print_subvol_info(search_subvol, bytenr, num_bytes,
                                          roots);
 
-               ULIST_ITER_INIT(&uiter);
-               while ((unode = ulist_next(roots, &uiter))) {
-                       BUG_ON(unode->val == 0ULL);
-                       /* We only want to account fs trees */
-                       if (is_fstree(unode->val) && do_qgroups)
-                               add_bytes(unode->val, num_bytes, exclusive);
-               }
+               if (!do_qgroups)
+                       continue;
+
+               if (account_one_extent(roots, bytenr, num_bytes))
+                       goto enomem;
        }
 
        ulist_free(roots);
        }
 
        ulist_free(roots);
+       return 0;
+enomem:
+       error("Out of memory while accounting refs for qgroups");
+       return -ENOMEM;
 }
 
 static u64 resolve_one_root(u64 bytenr)
 }
 
 static u64 resolve_one_root(u64 bytenr)
@@ -668,6 +817,8 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
                item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk);
                item->exclusive_compressed =
                        btrfs_qgroup_info_exclusive_compressed(leaf, disk);
                item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk);
                item->exclusive_compressed =
                        btrfs_qgroup_info_exclusive_compressed(leaf, disk);
+               INIT_LIST_HEAD(&c->groups);
+               INIT_LIST_HEAD(&c->members);
 
                if (insert_count(c)) {
                        free(c);
 
                if (insert_count(c)) {
                        free(c);
@@ -677,29 +828,30 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
        return c;
 }
 
        return c;
 }
 
-static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive)
+static int add_qgroup_relation(u64 memberid, u64 parentid)
 {
 {
-       struct qgroup_count *count = find_count(root_objectid);
-       struct qgroup_info *qg;
+       struct qgroup_count *member;
+       struct qgroup_count *parent;
+       struct btrfs_qgroup_list *list;
 
 
-       BUG_ON(num_bytes < 4096); /* Random sanity check. */
+       if (memberid > parentid)
+               return 0;
 
 
-       if (!count)
-               return;
+       member = find_count(memberid);
+       parent = find_count(parentid);
+       if (!member || !parent)
+               return -ENOENT;
 
 
-       qg = &count->info;
+       list = calloc(1, sizeof(*list));
+       if (!list)
+               return -ENOMEM;
 
 
-       qg->referenced += num_bytes;
-       /*
-        * count of compressed bytes is unimplemented, so we do the
-        * same as kernel.
-        */
-       qg->referenced_compressed += num_bytes;
+       list->group = parent;
+       list->member = member;
+       list_add_tail(&list->next_group, &member->groups);
+       list_add_tail(&list->next_member, &parent->members);
 
 
-       if (exclusive) {
-               qg->exclusive += num_bytes;
-               qg->exclusive_compressed += num_bytes;
-       }
+       return 0;
 }
 
 static void read_qgroup_status(struct btrfs_path *path,
 }
 
 static void read_qgroup_status(struct btrfs_path *path,
@@ -733,11 +885,18 @@ static int load_quota_info(struct btrfs_fs_info *info)
        struct btrfs_qgroup_info_item *item;
        struct qgroup_count *count;
        int i, nr;
        struct btrfs_qgroup_info_item *item;
        struct qgroup_count *count;
        int i, nr;
+       int search_relations = 0;
 
 
+loop:
+       /*
+        * Do 2 passes, the first allocates group counts and reads status
+        * items. The 2nd pass picks up relation items and glues them to their
+        * respective count structures.
+        */
        btrfs_init_path(&path);
 
        key.offset = 0;
        btrfs_init_path(&path);
 
        key.offset = 0;
-       key.objectid = 0;
+       key.objectid = search_relations ? 0 : BTRFS_QGROUP_RELATION_KEY;
        key.type = 0;
 
        ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
        key.type = 0;
 
        ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
@@ -754,17 +913,26 @@ static int load_quota_info(struct btrfs_fs_info *info)
                        btrfs_item_key(leaf, &disk_key, i);
                        btrfs_disk_key_to_cpu(&key, &disk_key);
 
                        btrfs_item_key(leaf, &disk_key, i);
                        btrfs_disk_key_to_cpu(&key, &disk_key);
 
+                       if (search_relations) {
+                               if (key.type == BTRFS_QGROUP_RELATION_KEY) {
+                                       ret = add_qgroup_relation(key.objectid,
+                                                                 key.offset);
+                                       if (ret) {
+                                               error("out of memory");
+                                               goto out;
+                                       }
+                               }
+                               continue;
+                       }
+
                        if (key.type == BTRFS_QGROUP_STATUS_KEY) {
                                read_qgroup_status(&path, &counts);
                                continue;
                        }
                        if (key.type == BTRFS_QGROUP_STATUS_KEY) {
                                read_qgroup_status(&path, &counts);
                                continue;
                        }
-                       if (key.type == BTRFS_QGROUP_RELATION_KEY)
-                               printf("Ignoring qgroup relation key %llu\n",
-                                      key.objectid);
 
                        /*
 
                        /*
-                        * Ignore: BTRFS_QGROUP_LIMIT_KEY,
-                        *         BTRFS_QGROUP_RELATION_KEY
+                        * At this point, we can ignore anything that
+                        * isn't a qgroup info.
                         */
                        if (key.type != BTRFS_QGROUP_INFO_KEY)
                                continue;
                         */
                        if (key.type != BTRFS_QGROUP_INFO_KEY)
                                continue;
@@ -796,6 +964,12 @@ static int load_quota_info(struct btrfs_fs_info *info)
 
        ret = 0;
        btrfs_release_path(&path);
 
        ret = 0;
        btrfs_release_path(&path);
+
+       if (!search_relations) {
+               search_relations = 1;
+               goto loop;
+       }
+
 out:
        return ret;
 }
 out:
        return ret;
 }
@@ -1040,6 +1214,11 @@ static void print_fields_signed(long long bytes,
               prefix, type, bytes, type, bytes_compressed);
 }
 
               prefix, type, bytes, type, bytes_compressed);
 }
 
+static inline int qgroup_printable(struct qgroup_count *c)
+{
+       return !!(c->subvol_exists || btrfs_qgroup_level(c->qgroupid));
+}
+
 static int report_qgroup_difference(struct qgroup_count *count, int verbose)
 {
        int is_different;
 static int report_qgroup_difference(struct qgroup_count *count, int verbose)
 {
        int is_different;
@@ -1050,9 +1229,10 @@ static int report_qgroup_difference(struct qgroup_count *count, int verbose)
 
        is_different = excl_diff || ref_diff;
 
 
        is_different = excl_diff || ref_diff;
 
-       if (verbose || (is_different && count->subvol_exists)) {
-               printf("Counts for qgroup id: %llu %s\n",
-                      (unsigned long long)count->qgroupid,
+       if (verbose || (is_different && qgroup_printable(count))) {
+               printf("Counts for qgroup id: %llu/%llu %s\n",
+                      btrfs_qgroup_level(count->qgroupid),
+                      btrfs_qgroup_subvid(count->qgroupid),
                       is_different ? "are different" : "");
 
                print_fields(info->referenced, info->referenced_compressed,
                       is_different ? "are different" : "");
 
                print_fields(info->referenced, info->referenced_compressed,
@@ -1104,10 +1284,27 @@ void free_qgroup_counts(void)
 {
        struct rb_node *node;
        struct qgroup_count *c;
 {
        struct rb_node *node;
        struct qgroup_count *c;
+       struct btrfs_qgroup_list *glist, *tmpglist;
+
        node = rb_first(&counts.root);
        while (node) {
                c = rb_entry(node, struct qgroup_count, rb_node);
        node = rb_first(&counts.root);
        while (node) {
                c = rb_entry(node, struct qgroup_count, rb_node);
+
+               list_for_each_entry_safe(glist, tmpglist, &c->groups,
+                                        next_group) {
+                       list_del(&glist->next_group);
+                       list_del(&glist->next_member);
+                       free(glist);
+               }
+               list_for_each_entry_safe(glist, tmpglist, &c->members,
+                                        next_group) {
+                       list_del(&glist->next_group);
+                       list_del(&glist->next_member);
+                       free(glist);
+               }
+
                node = rb_next(node);
                node = rb_next(node);
+
                rb_erase(&c->rb_node, &counts.root);
                free(c);
        }
                rb_erase(&c->rb_node, &counts.root);
                free(c);
        }
@@ -1148,7 +1345,7 @@ int qgroup_verify_all(struct btrfs_fs_info *info)
                goto out;
        }
 
                goto out;
        }
 
-       account_all_refs(1, 0);
+       ret = account_all_refs(1, 0);
 
 out:
        /*
 
 out:
        /*
@@ -1220,7 +1417,7 @@ int print_extent_state(struct btrfs_fs_info *info, u64 subvol)
        }
 
        printf("Offset\t\tLen\tRoot Refs\tRoots\n");
        }
 
        printf("Offset\t\tLen\tRoot Refs\tRoots\n");
-       account_all_refs(0, subvol);
+       ret = account_all_refs(0, subvol);
 
 out:
        free_tree_blocks();
 
 out:
        free_tree_blocks();