btrfs-progs: tests: enable check lowmem in travis CI
[platform/upstream/btrfs-progs.git] / cmds-check.c
index 1d399d7..e746ee7 100644 (file)
@@ -133,6 +133,7 @@ struct data_backref {
 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
+#define BG_ACCOUNTING_ERROR     (1<<21) /* Block group accounting error */
 
 static inline struct data_backref* to_data_backref(struct extent_backref *back)
 {
@@ -1974,10 +1975,15 @@ struct node_refs {
        u64 bytenr[BTRFS_MAX_LEVEL];
        u64 refs[BTRFS_MAX_LEVEL];
        int need_check[BTRFS_MAX_LEVEL];
+       /* field for checking all trees */
+       int checked[BTRFS_MAX_LEVEL];
+       /* the corresponding extent should be marked as full backref or not */
+       int full_backref[BTRFS_MAX_LEVEL];
 };
 
 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
-                            struct node_refs *nrefs, u64 level);
+                            struct extent_buffer *eb, struct node_refs *nrefs,
+                            u64 level, int check_all);
 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
                            unsigned int ext_ref);
 
@@ -2040,9 +2046,8 @@ again:
                if (path->nodes[i]->start == nrefs->bytenr[i])
                        continue;
 
-               ret = update_nodes_refs(root,
-                               path->nodes[i]->start,
-                               nrefs, i);
+               ret = update_nodes_refs(root, path->nodes[i]->start,
+                               path->nodes[i], nrefs, i, 0);
                if (ret)
                        goto out;
 
@@ -2144,7 +2149,12 @@ static int need_check(struct btrfs_root *root, struct ulist *roots)
        struct rb_node *node;
        struct ulist_node *u;
 
-       if (roots->nnodes == 1)
+       /*
+        * @roots can be empty if it belongs to tree reloc tree
+        * In that case, we should always check the leaf, as we can't use
+        * the tree owner to ensure some other root will check it.
+        */
+       if (roots->nnodes == 1 || roots->nnodes == 0)
                return 1;
 
        node = rb_first(&roots->root);
@@ -2159,25 +2169,143 @@ static int need_check(struct btrfs_root *root, struct ulist *roots)
        return 1;
 }
 
+static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
+                              u64 *flags_ret)
+{
+       struct btrfs_root *extent_root = root->fs_info->extent_root;
+       struct btrfs_root_item *ri = &root->root_item;
+       struct btrfs_extent_inline_ref *iref;
+       struct btrfs_extent_item *ei;
+       struct btrfs_key key;
+       struct btrfs_path *path = NULL;
+       unsigned long ptr;
+       unsigned long end;
+       u64 flags;
+       u64 owner = 0;
+       u64 offset;
+       int slot;
+       int type;
+       int ret = 0;
+
+       /*
+        * Except file/reloc tree, we can not have FULL BACKREF MODE
+        */
+       if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
+               goto normal;
+
+       /* root node */
+       if (eb->start == btrfs_root_bytenr(ri))
+               goto normal;
+
+       if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
+               goto full_backref;
+
+       owner = btrfs_header_owner(eb);
+       if (owner == root->objectid)
+               goto normal;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = btrfs_header_bytenr(eb);
+       key.type = (u8)-1;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+       if (ret <= 0) {
+               ret = -EIO;
+               goto out;
+       }
+
+       if (ret > 0) {
+               ret = btrfs_previous_extent_item(extent_root, path,
+                                                key.objectid);
+               if (ret)
+                       goto full_backref;
+
+       }
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+       eb = path->nodes[0];
+       slot = path->slots[0];
+       ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
+
+       flags = btrfs_extent_flags(eb, ei);
+       if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+               goto full_backref;
+
+       ptr = (unsigned long)(ei + 1);
+       end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
+
+       if (key.type == BTRFS_EXTENT_ITEM_KEY)
+               ptr += sizeof(struct btrfs_tree_block_info);
+
+next:
+       /* Reached extent item ends normally */
+       if (ptr == end)
+               goto full_backref;
+
+       /* Beyond extent item end, wrong item size */
+       if (ptr > end) {
+               error("extent item at bytenr %llu slot %d has wrong size",
+                       eb->start, slot);
+               goto full_backref;
+       }
+
+       iref = (struct btrfs_extent_inline_ref *)ptr;
+       offset = btrfs_extent_inline_ref_offset(eb, iref);
+       type = btrfs_extent_inline_ref_type(eb, iref);
+
+       if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
+               goto normal;
+       ptr += btrfs_extent_inline_ref_size(type);
+       goto next;
+
+normal:
+       *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
+       goto out;
+
+full_backref:
+       *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
 /*
  * for a tree node or leaf, we record its reference count, so later if we still
  * process this node or leaf, don't need to compute its reference count again.
+ *
+ * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
  */
 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
-                            struct node_refs *nrefs, u64 level)
+                            struct extent_buffer *eb, struct node_refs *nrefs,
+                            u64 level, int check_all)
 {
-       int check, ret;
-       u64 refs;
        struct ulist *roots;
+       u64 refs = 0;
+       u64 flags = 0;
+       int root_level = btrfs_header_level(root->node);
+       int check;
+       int ret;
+
+       if (nrefs->bytenr[level] == bytenr)
+               return 0;
 
-       if (nrefs->bytenr[level] != bytenr) {
+       if (bytenr != (u64)-1) {
+               /* the return value of this function seems a mistake */
                ret = btrfs_lookup_extent_info(NULL, root, bytenr,
-                                      level, 1, &refs, NULL);
-               if (ret < 0)
+                                      level, 1, &refs, &flags);
+               /* temporary fix */
+               if (ret < 0 && !check_all)
                        return ret;
 
                nrefs->bytenr[level] = bytenr;
                nrefs->refs[level] = refs;
+               nrefs->full_backref[level] = 0;
+               nrefs->checked[level] = 0;
+
                if (refs > 1) {
                        ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
                                                   0, &roots);
@@ -2188,13 +2316,58 @@ static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
                        ulist_free(roots);
                        nrefs->need_check[level] = check;
                } else {
-                       nrefs->need_check[level] = 1;
+                       if (!check_all) {
+                               nrefs->need_check[level] = 1;
+                       } else {
+                               if (level == root_level) {
+                                       nrefs->need_check[level] = 1;
+                               } else {
+                                       /*
+                                        * The node refs may have not been
+                                        * updated if upper needs checking (the
+                                        * lowest root_objectid) the node can
+                                        * be checked.
+                                        */
+                                       nrefs->need_check[level] =
+                                               nrefs->need_check[level + 1];
+                               }
+                       }
                }
        }
 
+       if (check_all && eb) {
+               calc_extent_flag_v2(root, eb, &flags);
+               if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+                       nrefs->full_backref[level] = 1;
+       }
+
        return 0;
 }
 
+/*
+ * @level           if @level == -1 means extent data item
+ *                  else normal treeblocl.
+ */
+static int should_check_extent_strictly(struct btrfs_root *root,
+                                       struct node_refs *nrefs, int level)
+{
+       int root_level = btrfs_header_level(root->node);
+
+       if (level > root_level || level < -1)
+               return 1;
+       if (level == root_level)
+               return 1;
+       /*
+        * if the upper node is marked full backref, it should contain shared
+        * backref of the parent (except owner == root->objectid).
+        */
+       while (++level <= root_level)
+               if (nrefs->refs[level] > 1)
+                       return 0;
+
+       return 1;
+}
+
 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
                          struct walk_control *wc, int *level,
                          struct node_refs *nrefs)
@@ -2324,16 +2497,197 @@ out:
        return err;
 }
 
+static int fs_root_objectid(u64 objectid);
+
+/*
+ * Update global fs information.
+ */
+static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
+                        int level)
+{
+       u32 free_nrs;
+       struct extent_buffer *eb = path->nodes[level];
+
+       total_btree_bytes += eb->len;
+       if (fs_root_objectid(root->objectid))
+               total_fs_tree_bytes += eb->len;
+       if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
+               total_extent_tree_bytes += eb->len;
+
+       if (level == 0) {
+               btree_space_waste += btrfs_leaf_free_space(root, eb);
+       } else {
+               free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
+                           btrfs_header_nritems(eb));
+               btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
+       }
+}
+
+/*
+ * This function only handles BACKREF_MISSING,
+ * If corresponding extent item exists, increase the ref, else insert an extent
+ * item and backref.
+ *
+ * Returns error bits after repair.
+ */
+static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct extent_buffer *node,
+                                struct node_refs *nrefs, int level, int err)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_root *extent_root = fs_info->extent_root;
+       struct btrfs_path path;
+       struct btrfs_extent_item *ei;
+       struct btrfs_tree_block_info *bi;
+       struct btrfs_key key;
+       struct extent_buffer *eb;
+       u32 size = sizeof(*ei);
+       u32 node_size = root->fs_info->nodesize;
+       int insert_extent = 0;
+       int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
+       int root_level = btrfs_header_level(root->node);
+       int generation;
+       int ret;
+       u64 owner;
+       u64 bytenr;
+       u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
+       u64 parent = 0;
+
+       if ((err & BACKREF_MISSING) == 0)
+               return err;
+
+       WARN_ON(level > BTRFS_MAX_LEVEL);
+       WARN_ON(level < 0);
+
+       btrfs_init_path(&path);
+       bytenr = btrfs_header_bytenr(node);
+       owner = btrfs_header_owner(node);
+       generation = btrfs_header_generation(node);
+
+       key.objectid = bytenr;
+       key.type = (u8)-1;
+       key.offset = (u64)-1;
+
+       /* Search for the extent item */
+       ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
+       if (ret <= 0) {
+               ret = -EIO;
+               goto out;
+       }
+
+       ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
+       if (ret)
+               insert_extent = 1;
+
+       /* calculate if the extent item flag is full backref or not */
+       if (nrefs->full_backref[level] != 0)
+               flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+
+       /* insert an extent item */
+       if (insert_extent) {
+               struct btrfs_disk_key copy_key;
+
+               generation = btrfs_header_generation(node);
+
+               if (level < root_level && nrefs->full_backref[level + 1] &&
+                   owner != root->objectid) {
+                       flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+               }
+
+               key.objectid = bytenr;
+               if (!skinny_metadata) {
+                       key.type = BTRFS_EXTENT_ITEM_KEY;
+                       key.offset = node_size;
+                       size += sizeof(*bi);
+               } else {
+                       key.type = BTRFS_METADATA_ITEM_KEY;
+                       key.offset = level;
+               }
+
+               btrfs_release_path(&path);
+               ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
+                                             size);
+               if (ret)
+                       goto out;
+
+               eb = path.nodes[0];
+               ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
+
+               btrfs_set_extent_refs(eb, ei, 0);
+               btrfs_set_extent_generation(eb, ei, generation);
+               btrfs_set_extent_flags(eb, ei, flags);
+
+               if (!skinny_metadata) {
+                       bi = (struct btrfs_tree_block_info *)(ei + 1);
+                       memset_extent_buffer(eb, 0, (unsigned long)bi,
+                                            sizeof(*bi));
+                       btrfs_set_disk_key_objectid(&copy_key, root->objectid);
+                       btrfs_set_disk_key_type(&copy_key, 0);
+                       btrfs_set_disk_key_offset(&copy_key, 0);
+
+                       btrfs_set_tree_block_level(eb, bi, level);
+                       btrfs_set_tree_block_key(eb, bi, &copy_key);
+               }
+               btrfs_mark_buffer_dirty(eb);
+               printf("Added an extent item [%llu %u]\n", bytenr, node_size);
+               btrfs_update_block_group(trans, extent_root, bytenr, node_size,
+                                        1, 0);
+
+               nrefs->refs[level] = 0;
+               nrefs->full_backref[level] =
+                       flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
+               btrfs_release_path(&path);
+       }
+
+       if (level < root_level && nrefs->full_backref[level + 1] &&
+           owner != root->objectid)
+               parent = nrefs->bytenr[level + 1];
+
+       /* increase the ref */
+       ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
+                       parent, root->objectid, level, 0);
+
+       nrefs->refs[level]++;
+out:
+       btrfs_release_path(&path);
+       if (ret) {
+               error(
+       "failed to repair tree block ref start %llu root %llu due to %s",
+                     bytenr, root->objectid, strerror(-ret));
+       } else {
+               printf("Added one tree block ref start %llu %s %llu\n",
+                      bytenr, parent ? "parent" : "root",
+                      parent ? parent : root->objectid);
+               err &= ~BACKREF_MISSING;
+       }
+
+       return err;
+}
+
 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
                            unsigned int ext_ref);
+static int check_tree_block_ref(struct btrfs_root *root,
+                               struct extent_buffer *eb, u64 bytenr,
+                               int level, u64 owner, struct node_refs *nrefs);
+static int check_leaf_items(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, struct btrfs_path *path,
+                           struct node_refs *nrefs, int account_bytes);
 
 /*
+ * @trans      just for lowmem repair mode
+ * @check all  if not 0 then check all tree block backrefs and items
+ *             0 then just check relationship of items in fs tree(s)
+ *
  * Returns >0  Found error, should continue
  * Returns <0  Fatal error, must exit the whole check
  * Returns 0   No errors found
  */
-static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
-                            int *level, struct node_refs *nrefs, int ext_ref)
+static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, struct btrfs_path *path,
+                            int *level, struct node_refs *nrefs, int ext_ref,
+                            int check_all)
+
 {
        enum btrfs_tree_block_status status;
        u64 bytenr;
@@ -2342,12 +2696,15 @@ static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
        struct extent_buffer *next;
        struct extent_buffer *cur;
        int ret;
+       int err = 0;
+       int check;
+       int account_file_data = 0;
 
        WARN_ON(*level < 0);
        WARN_ON(*level >= BTRFS_MAX_LEVEL);
 
-       ret = update_nodes_refs(root, path->nodes[*level]->start,
-                               nrefs, *level);
+       ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
+                               path->nodes[*level], nrefs, *level, check_all);
        if (ret < 0)
                return ret;
 
@@ -2355,37 +2712,80 @@ static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
                WARN_ON(*level < 0);
                WARN_ON(*level >= BTRFS_MAX_LEVEL);
                cur = path->nodes[*level];
+               bytenr = btrfs_header_bytenr(cur);
+               check = nrefs->need_check[*level];
 
                if (btrfs_header_level(cur) != *level)
                        WARN_ON(1);
+              /*
+               * Update bytes accounting and check tree block ref
+               * NOTE: Doing accounting and check before checking nritems
+               * is necessary because of empty node/leaf.
+               */
+               if ((check_all && !nrefs->checked[*level]) ||
+                   (!check_all && nrefs->need_check[*level])) {
+                       ret = check_tree_block_ref(root, cur,
+                          btrfs_header_bytenr(cur), btrfs_header_level(cur),
+                          btrfs_header_owner(cur), nrefs);
+
+                       if (repair && ret)
+                               ret = repair_tree_block_ref(trans, root,
+                                   path->nodes[*level], nrefs, *level, ret);
+                       err |= ret;
+
+                       if (check_all && nrefs->need_check[*level] &&
+                               nrefs->refs[*level]) {
+                               account_bytes(root, path, *level);
+                               account_file_data = 1;
+                       }
+                       nrefs->checked[*level] = 1;
+               }
 
                if (path->slots[*level] >= btrfs_header_nritems(cur))
                        break;
+
                /* Don't forgot to check leaf/node validation */
                if (*level == 0) {
-                       ret = btrfs_check_leaf(root, NULL, cur);
-                       if (ret != BTRFS_TREE_BLOCK_CLEAN) {
-                               ret = -EIO;
-                               break;
+                       /* skip duplicate check */
+                       if (check || !check_all) {
+                               ret = btrfs_check_leaf(root, NULL, cur);
+                               if (ret != BTRFS_TREE_BLOCK_CLEAN) {
+                                       err |= -EIO;
+                                       break;
+                               }
                        }
-                       ret = process_one_leaf_v2(root, path, nrefs,
-                                                 level, ext_ref);
-                       cur = path->nodes[*level];
+
+                       ret = 0;
+                       if (!check_all)
+                               ret = process_one_leaf_v2(root, path, nrefs,
+                                                         level, ext_ref);
+                       else
+                               ret = check_leaf_items(trans, root, path,
+                                              nrefs, account_file_data);
+                       err |= ret;
                        break;
                } else {
-                       ret = btrfs_check_node(root, NULL, cur);
-                       if (ret != BTRFS_TREE_BLOCK_CLEAN) {
-                               ret = -EIO;
-                               break;
+                       if (check || !check_all) {
+                               ret = btrfs_check_node(root, NULL, cur);
+                               if (ret != BTRFS_TREE_BLOCK_CLEAN) {
+                                       err |= -EIO;
+                                       break;
+                               }
                        }
                }
+
                bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
                ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
 
-               ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
-               if (ret)
+               ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
+                                       check_all);
+               if (ret < 0)
                        break;
-               if (!nrefs->need_check[*level - 1]) {
+               /*
+                * check all trees in check_chunks_and_extent_v2
+                * check shared node once in check_fs_roots
+                */
+               if (!check_all && !nrefs->need_check[*level - 1]) {
                        path->slots[*level]++;
                        continue;
                }
@@ -2402,16 +2802,15 @@ static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
                                                      &node_key,
                                                      path->slots[*level]);
                                btrfs_add_corrupt_extent_record(fs_info,
-                                               &node_key,
-                                               path->nodes[*level]->start,
-                                               fs_info->nodesize,
-                                               *level);
-                               ret = -EIO;
+                                       &node_key, path->nodes[*level]->start,
+                                       fs_info->nodesize, *level);
+                               err |= -EIO;
                                break;
                        }
                }
 
                ret = check_child_node(cur, path->slots[*level], next);
+               err |= ret;
                if (ret < 0) 
                        break;
 
@@ -2421,7 +2820,7 @@ static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
                        status = btrfs_check_node(root, NULL, next);
                if (status != BTRFS_TREE_BLOCK_CLEAN) {
                        free_extent_buffer(next);
-                       ret = -EIO;
+                       err |= -EIO;
                        break;
                }
 
@@ -2429,8 +2828,11 @@ static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
                free_extent_buffer(path->nodes[*level]);
                path->nodes[*level] = next;
                path->slots[*level] = 0;
+               account_file_data = 0;
+
+               update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
        }
-       return ret;
+       return err;
 }
 
 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
@@ -5470,8 +5872,9 @@ static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
                                                *end, fkey->offset - *end);
                if (!repair || ret) {
                        err |= FILE_EXTENT_ERROR;
-                       error("root %llu EXTENT_DATA[%llu %llu] interrupt",
-                             root->objectid, fkey->objectid, fkey->offset);
+                       error(
+               "root %llu EXTENT_DATA[%llu %llu] interrupt, should start at %llu",
+                       root->objectid, fkey->objectid, fkey->offset, *end);
                }
        }
 
@@ -6143,15 +6546,21 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
        return back;
 }
 /*
- * Iterate all item on the tree and call check_inode_item() to check.
+ * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
+ * blocks and integrity of fs tree items.
  *
- * @root:      the root of the tree to be checked.
- * @ext_ref:   the EXTENDED_IREF feature
- *
- * Return 0 if no error found.
- * Return <0 for error.
+ * @root:         the root of the tree to be checked.
+ * @ext_ref       feature EXTENDED_IREF is enable or not.
+ * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
+ *                otherwise means check fs tree(s) items relationship and
+ *               @root MUST be a fs tree root.
+ * Returns 0      represents OK.
+ * Returns not 0  represents error.
  */
-static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
+static int check_btrfs_root(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, unsigned int ext_ref,
+                           int check_all)
+
 {
        struct btrfs_path path;
        struct node_refs nrefs;
@@ -6160,18 +6569,20 @@ static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
        int level;
        int err = 0;
 
-       /*
-        * We need to manually check the first inode item(256)
-        * As the following traversal function will only start from
-        * the first inode item in the leaf, if inode item(256) is missing
-        * we will just skip it forever.
-        */
-       ret = check_fs_first_inode(root, ext_ref);
-       if (ret < 0)
-               return ret;
-       err |= !!ret;
-
        memset(&nrefs, 0, sizeof(nrefs));
+       if (!check_all) {
+               /*
+                * We need to manually check the first inode item (256)
+                * As the following traversal function will only start from
+                * the first inode item in the leaf, if inode item (256) is
+                * missing we will skip it forever.
+                */
+               ret = check_fs_first_inode(root, ext_ref);
+               if (ret < 0)
+                       return ret;
+       }
+
+
        level = btrfs_header_level(root->node);
        btrfs_init_path(&path);
 
@@ -6193,7 +6604,9 @@ static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
        }
 
        while (1) {
-               ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
+               ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
+                                       ext_ref, check_all);
+
                err |= !!ret;
 
                /* if ret is negative, walk shall stop */
@@ -6216,10 +6629,25 @@ out:
 }
 
 /*
- * Find the relative ref for root_ref and root_backref.
+ * Iterate all items in the tree and call check_inode_item() to check.
  *
- * @root:      the root of the root tree.
- * @ref_key:   the key of the root ref.
+ * @root:      the root of the tree to be checked.
+ * @ext_ref:   the EXTENDED_IREF feature
+ *
+ * Return 0 if no error found.
+ * Return <0 for error.
+ */
+static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
+{
+       reset_cached_block_groups(root->fs_info);
+       return check_btrfs_root(NULL, root, ext_ref, 0);
+}
+
+/*
+ * Find the relative ref for root_ref and root_backref.
+ *
+ * @root:      the root of the root tree.
+ * @ref_key:   the key of the root ref.
  *
  * Return 0 if no error occurred.
  */
@@ -10405,6 +10833,7 @@ static int check_extent_refs(struct btrfs_root *root,
        struct cache_extent *cache;
        int ret = 0;
        int had_dups = 0;
+       int err = 0;
 
        if (repair) {
                /*
@@ -10548,6 +10977,7 @@ static int check_extent_refs(struct btrfs_root *root,
                        cur_err = 1;
                }
 
+               err = cur_err;
                remove_cache_extent(extent_cache, cache);
                free_all_extent_backrefs(rec);
                if (!init_extent_tree && repair && (!cur_err || fix))
@@ -10580,7 +11010,10 @@ repair_abort:
                }
                return ret;
        }
-       return 0;
+
+       if (err)
+               err = -EIO;
+       return err;
 }
 
 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
@@ -10809,6 +11242,66 @@ static int check_device_used(struct device_record *dev_rec,
        }
 }
 
+/*
+ * Extra (optional) check for dev_item size to report possbile problem on a new
+ * kernel.
+ */
+static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
+{
+       if (!IS_ALIGNED(total_bytes, sectorsize)) {
+               warning(
+"unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
+                       devid, total_bytes, sectorsize);
+               warning(
+"this is OK for older kernel, but may cause kernel warning for newer kernels");
+               warning("this can be fixed by 'btrfs rescue fix-device-size'");
+       }
+}
+
+/*
+ * Unlike device size alignment check above, some super total_bytes check
+ * failure can lead to mount failure for newer kernel.
+ *
+ * So this function will return the error for a fatal super total_bytes problem.
+ */
+static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_device *dev;
+       struct list_head *dev_list = &fs_info->fs_devices->devices;
+       u64 total_bytes = 0;
+       u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
+
+       list_for_each_entry(dev, dev_list, dev_list)
+               total_bytes += dev->total_bytes;
+
+       /* Important check, which can cause unmountable fs */
+       if (super_bytes < total_bytes) {
+               error("super total bytes %llu smaller than real device(s) size %llu",
+                       super_bytes, total_bytes);
+               error("mounting this fs may fail for newer kernels");
+               error("this can be fixed by 'btrfs rescue fix-device-size'");
+               return false;
+       }
+
+       /*
+        * Optional check, just to make everything aligned and match with each
+        * other.
+        *
+        * For a btrfs-image restored fs, we don't need to check it anyway.
+        */
+       if (btrfs_super_flags(fs_info->super_copy) &
+           (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
+               return true;
+       if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
+           !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
+           super_bytes != total_bytes) {
+               warning("minor unaligned/mismatch device size detected");
+               warning(
+               "recommended to use 'btrfs rescue fix-device-size' to fix it");
+       }
+       return true;
+}
+
 /* check btrfs_dev_item -> btrfs_dev_extent */
 static int check_devices(struct rb_root *dev_cache,
                         struct device_extent_tree *dev_extent_cache)
@@ -10826,6 +11319,8 @@ static int check_devices(struct rb_root *dev_cache,
                if (err)
                        ret = err;
 
+               check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
+                                        global_info->sectorsize);
                dev_node = rb_next(dev_node);
        }
        list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
@@ -11151,6 +11646,29 @@ loop:
        goto again;
 }
 
+static int check_extent_inline_ref(struct extent_buffer *eb,
+                  struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
+{
+       int ret;
+       u8 type = btrfs_extent_inline_ref_type(eb, iref);
+
+       switch (type) {
+       case BTRFS_TREE_BLOCK_REF_KEY:
+       case BTRFS_EXTENT_DATA_REF_KEY:
+       case BTRFS_SHARED_BLOCK_REF_KEY:
+       case BTRFS_SHARED_DATA_REF_KEY:
+               ret = 0;
+               break;
+       default:
+               error("extent[%llu %u %llu] has unknown ref type: %d",
+                     key->objectid, key->type, key->offset, type);
+               ret = UNKNOWN_TYPE;
+               break;
+       }
+
+       return ret;
+}
+
 /*
  * Check backrefs of a tree block given by @bytenr or @eb.
  *
@@ -11165,7 +11683,7 @@ loop:
  */
 static int check_tree_block_ref(struct btrfs_root *root,
                                struct extent_buffer *eb, u64 bytenr,
-                               int level, u64 owner)
+                               int level, u64 owner, struct node_refs *nrefs)
 {
        struct btrfs_key key;
        struct btrfs_root *extent_root = root->fs_info->extent_root;
@@ -11177,18 +11695,16 @@ static int check_tree_block_ref(struct btrfs_root *root,
        unsigned long ptr;
        int slot;
        int skinny_level;
+       int root_level = btrfs_header_level(root->node);
        int type;
        u32 nodesize = root->fs_info->nodesize;
        u32 item_size;
        u64 offset;
-       int tree_reloc_root = 0;
        int found_ref = 0;
        int err = 0;
        int ret;
-
-       if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
-           btrfs_header_bytenr(root->node) == bytenr)
-               tree_reloc_root = 1;
+       int strict = 1;
+       int parent = 0;
 
        btrfs_init_path(&path);
        key.objectid = bytenr;
@@ -11227,10 +11743,19 @@ static int check_tree_block_ref(struct btrfs_root *root,
                iref = (struct btrfs_extent_inline_ref *)(info + 1);
        }
 
+
        if (eb) {
                u64 header_gen;
                u64 extent_gen;
 
+               /*
+                * Due to the feature of shared tree blocks, if the upper node
+                * is a fs root or shared node, the extent of checked node may
+                * not be updated until the next CoW.
+                */
+               if (nrefs)
+                       strict = should_check_extent_strictly(root, nrefs,
+                                       level);
                if (!(btrfs_extent_flags(leaf, ei) &
                      BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
                        error(
@@ -11268,25 +11793,42 @@ static int check_tree_block_ref(struct btrfs_root *root,
        item_size = btrfs_item_size_nr(leaf, slot);
        ptr = (unsigned long)iref;
        end = (unsigned long)ei + item_size;
+
        while (ptr < end) {
                iref = (struct btrfs_extent_inline_ref *)ptr;
                type = btrfs_extent_inline_ref_type(leaf, iref);
                offset = btrfs_extent_inline_ref_offset(leaf, iref);
 
-               if (type == BTRFS_TREE_BLOCK_REF_KEY &&
-                       (offset == root->objectid || offset == owner)) {
-                       found_ref = 1;
+               ret = check_extent_inline_ref(leaf, &key, iref);
+               if (ret) {
+                       err |= ret;
+                       break;
+               }
+               if (type == BTRFS_TREE_BLOCK_REF_KEY) {
+                       if (offset == root->objectid)
+                               found_ref = 1;
+                       if (!strict && owner == offset)
+                               found_ref = 1;
                } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
                        /*
                         * Backref of tree reloc root points to itself, no need
                         * to check backref any more.
+                        *
+                        * This may be an error of loop backref, but extent tree
+                        * checker should have already handled it.
+                        * Here we only need to avoid infinite iteration.
                         */
-                       if (tree_reloc_root)
+                       if (offset == bytenr) {
                                found_ref = 1;
-                       else
-                       /* Check if the backref points to valid referencer */
-                               found_ref = !check_tree_block_ref(root, NULL,
-                                               offset, level + 1, owner);
+                       } else {
+                               /*
+                                * Check if the backref points to valid
+                                * referencer
+                                */
+                               found_ref = !check_tree_block_ref( root, NULL,
+                                               offset, level + 1, owner,
+                                               NULL);
+                       }
                }
 
                if (found_ref)
@@ -11308,13 +11850,157 @@ static int check_tree_block_ref(struct btrfs_root *root,
                if (!ret)
                        found_ref = 1;
        }
+       /*
+        * Finally check SHARED BLOCK REF, any found will be good
+        * Here we're not doing comprehensive extent backref checking,
+        * only need to ensure there is some extent referring to this
+        * tree block.
+        */
+       if (!found_ref) {
+               btrfs_release_path(&path);
+               key.objectid = bytenr;
+               key.type = BTRFS_SHARED_BLOCK_REF_KEY;
+               key.offset = (u64)-1;
+
+               ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
+               if (ret < 0) {
+                       err |= BACKREF_MISSING;
+                       goto out;
+               }
+               ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
+               if (ret) {
+                       err |= BACKREF_MISSING;
+                       goto out;
+               }
+               found_ref = 1;
+       }
        if (!found_ref)
                err |= BACKREF_MISSING;
 out:
        btrfs_release_path(&path);
+       if (nrefs && strict &&
+           level < root_level && nrefs->full_backref[level + 1])
+               parent = nrefs->bytenr[level + 1];
        if (eb && (err & BACKREF_MISSING))
-               error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
-                       bytenr, nodesize, owner, level);
+               error(
+       "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
+                     bytenr, nodesize, owner, level,
+                     parent ? "parent" : "root",
+                     parent ? parent : root->objectid);
+       return err;
+}
+
+/*
+ * If @err contains BACKREF_MISSING then add extent of the
+ * file_extent_data_item.
+ *
+ * Returns error bits after reapir.
+ */
+static int repair_extent_data_item(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  struct btrfs_path *pathp,
+                                  struct node_refs *nrefs,
+                                  int err)
+{
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_key fi_key;
+       struct btrfs_key key;
+       struct btrfs_extent_item *ei;
+       struct btrfs_path path;
+       struct btrfs_root *extent_root = root->fs_info->extent_root;
+       struct extent_buffer *eb;
+       u64 size;
+       u64 disk_bytenr;
+       u64 num_bytes;
+       u64 parent;
+       u64 offset;
+       u64 extent_offset;
+       u64 file_offset;
+       int generation;
+       int slot;
+       int ret = 0;
+
+       eb = pathp->nodes[0];
+       slot = pathp->slots[0];
+       btrfs_item_key_to_cpu(eb, &fi_key, slot);
+       fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+
+       if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
+           btrfs_file_extent_disk_bytenr(eb, fi) == 0)
+               return err;
+
+       file_offset = fi_key.offset;
+       generation = btrfs_file_extent_generation(eb, fi);
+       disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
+       num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
+       extent_offset = btrfs_file_extent_offset(eb, fi);
+       offset = file_offset - extent_offset;
+
+       /* now repair only adds backref */
+       if ((err & BACKREF_MISSING) == 0)
+               return err;
+
+       /* search extent item */
+       key.objectid = disk_bytenr;
+       key.type = BTRFS_EXTENT_ITEM_KEY;
+       key.offset = num_bytes;
+
+       btrfs_init_path(&path);
+       ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
+       if (ret < 0) {
+               ret = -EIO;
+               goto out;
+       }
+
+       /* insert an extent item */
+       if (ret > 0) {
+               key.objectid = disk_bytenr;
+               key.type = BTRFS_EXTENT_ITEM_KEY;
+               key.offset = num_bytes;
+               size = sizeof(*ei);
+
+               btrfs_release_path(&path);
+               ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
+                                             size);
+               if (ret)
+                       goto out;
+               eb = path.nodes[0];
+               ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
+
+               btrfs_set_extent_refs(eb, ei, 0);
+               btrfs_set_extent_generation(eb, ei, generation);
+               btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
+
+               btrfs_mark_buffer_dirty(eb);
+               ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
+                                              num_bytes, 1, 0);
+               btrfs_release_path(&path);
+       }
+
+       if (nrefs->full_backref[0])
+               parent = btrfs_header_bytenr(eb);
+       else
+               parent = 0;
+
+       ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
+                                  root->objectid,
+                  parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
+                                  offset);
+       if (ret) {
+               error(
+               "failed to increase extent data backref[%llu %llu] root %llu",
+                     disk_bytenr, num_bytes, root->objectid);
+               goto out;
+       } else {
+               printf("Add one extent data backref [%llu %llu]\n",
+                      disk_bytenr, num_bytes);
+       }
+
+       err &= ~BACKREF_MISSING;
+out:
+       if (ret)
+               error("can't repair root %llu extent data item[%llu %llu]",
+                     root->objectid, disk_bytenr, num_bytes);
        return err;
 }
 
@@ -11325,9 +12011,11 @@ out:
  * Return 0 for no error found
  */
 static int check_extent_data_item(struct btrfs_root *root,
-                                 struct extent_buffer *eb, int slot)
+                                 struct btrfs_path *pathp,
+                                 struct node_refs *nrefs,  int account_bytes)
 {
        struct btrfs_file_extent_item *fi;
+       struct extent_buffer *eb = pathp->nodes[0];
        struct btrfs_path path;
        struct btrfs_root *extent_root = root->fs_info->extent_root;
        struct btrfs_key fi_key;
@@ -11341,14 +12029,16 @@ static int check_extent_data_item(struct btrfs_root *root,
        u64 disk_num_bytes;
        u64 extent_num_bytes;
        u64 extent_flags;
+       u64 offset;
        u32 item_size;
        unsigned long end;
        unsigned long ptr;
        int type;
-       u64 ref_root;
        int found_dbackref = 0;
+       int slot = pathp->slots[0];
        int err = 0;
        int ret;
+       int strict;
 
        btrfs_item_key_to_cpu(eb, &fi_key, slot);
        fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
@@ -11361,6 +12051,7 @@ static int check_extent_data_item(struct btrfs_root *root,
        disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
        disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
        extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
+       offset = btrfs_file_extent_offset(eb, fi);
 
        /* Check unaligned disk_num_bytes and num_bytes */
        if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
@@ -11369,7 +12060,7 @@ static int check_extent_data_item(struct btrfs_root *root,
                        fi_key.objectid, fi_key.offset, disk_num_bytes,
                        root->fs_info->sectorsize);
                err |= BYTES_UNALIGNED;
-       } else {
+       } else if (account_bytes) {
                data_bytes_allocated += disk_num_bytes;
        }
        if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
@@ -11378,7 +12069,7 @@ static int check_extent_data_item(struct btrfs_root *root,
                        fi_key.objectid, fi_key.offset, extent_num_bytes,
                        root->fs_info->sectorsize);
                err |= BYTES_UNALIGNED;
-       } else {
+       } else if (account_bytes) {
                data_bytes_referenced += extent_num_bytes;
        }
        owner = btrfs_header_owner(eb);
@@ -11412,19 +12103,39 @@ static int check_extent_data_item(struct btrfs_root *root,
        iref = (struct btrfs_extent_inline_ref *)(ei + 1);
        ptr = (unsigned long)iref;
        end = (unsigned long)ei + item_size;
+       strict = should_check_extent_strictly(root, nrefs, -1);
+
        while (ptr < end) {
+               u64 ref_root;
+               u64 ref_objectid;
+               u64 ref_offset;
+               bool match = false;
+
                iref = (struct btrfs_extent_inline_ref *)ptr;
                type = btrfs_extent_inline_ref_type(leaf, iref);
                dref = (struct btrfs_extent_data_ref *)(&iref->offset);
 
+               ret = check_extent_inline_ref(leaf, &dbref_key, iref);
+               if (ret) {
+                       err |= ret;
+                       break;
+               }
                if (type == BTRFS_EXTENT_DATA_REF_KEY) {
                        ref_root = btrfs_extent_data_ref_root(leaf, dref);
-                       if (ref_root == owner || ref_root == root->objectid)
+                       ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
+                       ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
+
+                       if (ref_objectid == fi_key.objectid &&
+                           ref_offset == fi_key.offset - offset)
+                               match = true;
+                       if (ref_root == root->objectid && match)
+                               found_dbackref = 1;
+                       else if (!strict && owner == ref_root && match)
                                found_dbackref = 1;
                } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
                        found_dbackref = !check_tree_block_ref(root, NULL,
                                btrfs_extent_inline_ref_offset(leaf, iref),
-                               0, owner);
+                               0, owner, NULL);
                }
 
                if (found_dbackref)
@@ -11439,7 +12150,7 @@ static int check_extent_data_item(struct btrfs_root *root,
                dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
                dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
                dbref_key.offset = hash_extent_data_ref(root->objectid,
-                               fi_key.objectid, fi_key.offset);
+                               fi_key.objectid, fi_key.offset - offset);
 
                ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
                                        &dbref_key, &path, 0, 0);
@@ -11796,7 +12507,8 @@ static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
                leaf = path.nodes[0];
                slot = path.slots[0];
 
-               if (slot >= btrfs_header_nritems(leaf))
+               if (slot >= btrfs_header_nritems(leaf) ||
+                   btrfs_header_owner(leaf) != root_id)
                        goto next;
                btrfs_item_key_to_cpu(leaf, &key, slot);
                if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
@@ -11806,11 +12518,17 @@ static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
                 * Except normal disk bytenr and disk num bytes, we still
                 * need to do extra check on dbackref offset as
                 * dbackref offset = file_offset - file_extent_offset
+                *
+                * Also, we must check the leaf owner.
+                * In case of shared tree blocks (snapshots) we can inherit
+                * leaves from source snapshot.
+                * In that case, reference from source snapshot should not
+                * count.
                 */
                if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
                    btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
                    (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
-                   offset)
+                   offset && btrfs_header_owner(leaf) == root_id)
                        found_count++;
 
 next:
@@ -11873,24 +12591,77 @@ out:
 }
 
 /*
+ * Only delete backref if REFERENCER_MISSING now
+ *
+ * Returns <0   the extent was deleted
+ * Returns >0   the backref was deleted but extent still exists, returned value
+ *               means error after repair
+ * Returns  0   nothing happened
+ */
+static int repair_extent_item(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *root, struct btrfs_path *path,
+                     u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
+                     u64 owner, u64 offset, int err)
+{
+       struct btrfs_key old_key;
+       int freed = 0;
+       int ret;
+
+       btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
+
+       if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
+               /* delete the backref */
+               ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
+                         num_bytes, parent, root_objectid, owner, offset);
+               if (!ret) {
+                       freed = 1;
+                       err &= ~REFERENCER_MISSING;
+                       printf("Delete backref in extent [%llu %llu]\n",
+                              bytenr, num_bytes);
+               } else {
+                       error("fail to delete backref in extent [%llu %llu]",
+                              bytenr, num_bytes);
+               }
+       }
+
+       /* btrfs_free_extent may delete the extent */
+       btrfs_release_path(path);
+       ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
+
+       if (ret)
+               ret = -ENOENT;
+       else if (freed)
+               ret = err;
+       return ret;
+}
+
+/*
  * This function will check a given extent item, including its backref and
  * itself (like crossing stripe boundary and type)
  *
  * Since we don't use extent_record anymore, introduce new error bit
  */
-static int check_extent_item(struct btrfs_fs_info *fs_info,
-                            struct extent_buffer *eb, int slot)
+static int check_extent_item(struct btrfs_trans_handle *trans,
+                            struct btrfs_fs_info *fs_info,
+                            struct btrfs_path *path)
 {
        struct btrfs_extent_item *ei;
        struct btrfs_extent_inline_ref *iref;
        struct btrfs_extent_data_ref *dref;
+       struct extent_buffer *eb = path->nodes[0];
        unsigned long end;
        unsigned long ptr;
+       int slot = path->slots[0];
        int type;
        u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
        u32 item_size = btrfs_item_size_nr(eb, slot);
        u64 flags;
        u64 offset;
+       u64 parent;
+       u64 num_bytes;
+       u64 root_objectid;
+       u64 owner;
+       u64 owner_offset;
        int metadata = 0;
        int level;
        struct btrfs_key key;
@@ -11898,10 +12669,13 @@ static int check_extent_item(struct btrfs_fs_info *fs_info,
        int err = 0;
 
        btrfs_item_key_to_cpu(eb, &key, slot);
-       if (key.type == BTRFS_EXTENT_ITEM_KEY)
+       if (key.type == BTRFS_EXTENT_ITEM_KEY) {
                bytes_used += key.offset;
-       else
+               num_bytes = key.offset;
+       } else {
                bytes_used += nodesize;
+               num_bytes = nodesize;
+       }
 
        if (item_size < sizeof(*ei)) {
                /*
@@ -11953,42 +12727,63 @@ next:
                goto out;
        }
 
+       parent = 0;
+       root_objectid = 0;
+       owner = 0;
+       owner_offset = 0;
        /* Now check every backref in this extent item */
        iref = (struct btrfs_extent_inline_ref *)ptr;
        type = btrfs_extent_inline_ref_type(eb, iref);
        offset = btrfs_extent_inline_ref_offset(eb, iref);
        switch (type) {
        case BTRFS_TREE_BLOCK_REF_KEY:
+               root_objectid = offset;
+               owner = level;
                ret = check_tree_block_backref(fs_info, offset, key.objectid,
                                               level);
                err |= ret;
                break;
        case BTRFS_SHARED_BLOCK_REF_KEY:
+               parent = offset;
                ret = check_shared_block_backref(fs_info, offset, key.objectid,
                                                 level);
                err |= ret;
                break;
        case BTRFS_EXTENT_DATA_REF_KEY:
                dref = (struct btrfs_extent_data_ref *)(&iref->offset);
-               ret = check_extent_data_backref(fs_info,
-                               btrfs_extent_data_ref_root(eb, dref),
-                               btrfs_extent_data_ref_objectid(eb, dref),
-                               btrfs_extent_data_ref_offset(eb, dref),
-                               key.objectid, key.offset,
-                               btrfs_extent_data_ref_count(eb, dref));
+               root_objectid = btrfs_extent_data_ref_root(eb, dref);
+               owner = btrfs_extent_data_ref_objectid(eb, dref);
+               owner_offset = btrfs_extent_data_ref_offset(eb, dref);
+               ret = check_extent_data_backref(fs_info, root_objectid, owner,
+                                       owner_offset, key.objectid, key.offset,
+                                       btrfs_extent_data_ref_count(eb, dref));
                err |= ret;
                break;
        case BTRFS_SHARED_DATA_REF_KEY:
+               parent = offset;
                ret = check_shared_data_backref(fs_info, offset, key.objectid);
                err |= ret;
                break;
        default:
                error("extent[%llu %d %llu] has unknown ref type: %d",
                        key.objectid, key.type, key.offset, type);
-               err |= UNKNOWN_TYPE;
+               ret = UNKNOWN_TYPE;
+               err |= ret;
                goto out;
        }
 
+       if (err && repair) {
+               ret = repair_extent_item(trans, fs_info->extent_root, path,
+                        key.objectid, num_bytes, parent, root_objectid,
+                        owner, owner_offset, ret);
+               if (ret < 0)
+                       goto out;
+               if (ret) {
+                       goto next;
+                       err = ret;
+               }
+       }
+
        ptr += btrfs_extent_inline_ref_size(type);
        goto next;
 
@@ -12071,6 +12866,7 @@ static int check_dev_item(struct btrfs_fs_info *fs_info,
        struct btrfs_path path;
        struct btrfs_key key;
        struct btrfs_dev_extent *ptr;
+       u64 total_bytes;
        u64 dev_id;
        u64 used;
        u64 total = 0;
@@ -12079,6 +12875,7 @@ static int check_dev_item(struct btrfs_fs_info *fs_info,
        dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
        dev_id = btrfs_device_id(eb, dev_item);
        used = btrfs_device_bytes_used(eb, dev_item);
+       total_bytes = btrfs_device_total_bytes(eb, dev_item);
 
        key.objectid = dev_id;
        key.type = BTRFS_DEV_EXTENT_KEY;
@@ -12123,6 +12920,8 @@ next:
                        BTRFS_DEV_EXTENT_KEY, dev_id);
                return ACCOUNTING_MISMATCH;
        }
+       check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
+
        return 0;
 }
 
@@ -12250,8 +13049,51 @@ out:
                error(
                "block group[%llu %llu] used %llu but extent items used %llu",
                        bg_key.objectid, bg_key.offset, used, total);
-               err |= ACCOUNTING_MISMATCH;
+               err |= BG_ACCOUNTING_ERROR;
+       }
+       return err;
+}
+
+/*
+ * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
+ * FIXME: We still need to repair error of dev_item.
+ *
+ * Returns error after repair.
+ */
+static int repair_chunk_item(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *chunk_root,
+                            struct btrfs_path *path, int err)
+{
+       struct btrfs_chunk *chunk;
+       struct btrfs_key chunk_key;
+       struct extent_buffer *eb = path->nodes[0];
+       u64 length;
+       int slot = path->slots[0];
+       u64 type;
+       int ret = 0;
+
+       btrfs_item_key_to_cpu(eb, &chunk_key, slot);
+       if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
+               return err;
+       chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
+       type = btrfs_chunk_type(path->nodes[0], chunk);
+       length = btrfs_chunk_length(eb, chunk);
+
+       if (err & REFERENCER_MISSING) {
+               ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
+                    type, chunk_key.objectid, chunk_key.offset, length);
+               if (ret) {
+                       error("fail to add block group item[%llu %llu]",
+                             chunk_key.offset, length);
+                       goto out;
+               } else {
+                       err &= ~REFERENCER_MISSING;
+                       printf("Added block group item[%llu %llu]\n",
+                              chunk_key.offset, length);
+               }
        }
+
+out:
        return err;
 }
 
@@ -12360,30 +13202,83 @@ out:
        return err;
 }
 
+static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  struct btrfs_path *path)
+{
+       struct btrfs_key key;
+       int ret = 0;
+
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+       btrfs_release_path(path);
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       ret = btrfs_del_item(trans, root, path);
+       if (ret)
+               goto out;
+
+       if (path->slots[0] == 0)
+               btrfs_prev_leaf(root, path);
+       else
+               path->slots[0]--;
+out:
+       if (ret)
+               error("failed to delete root %llu item[%llu, %u, %llu]",
+                     root->objectid, key.objectid, key.type, key.offset);
+       else
+               printf("Deleted root %llu item[%llu, %u, %llu]\n",
+                      root->objectid, key.objectid, key.type, key.offset);
+       return ret;
+}
+
 /*
  * Main entry function to check known items and update related accounting info
  */
-static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
+static int check_leaf_items(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, struct btrfs_path *path,
+                           struct node_refs *nrefs, int account_bytes)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_key key;
-       int slot = 0;
+       struct extent_buffer *eb;
+       int slot;
        int type;
        struct btrfs_extent_data_ref *dref;
-       int ret;
+       int ret = 0;
        int err = 0;
 
-next:
+again:
+       eb = path->nodes[0];
+       slot = path->slots[0];
+       if (slot >= btrfs_header_nritems(eb)) {
+               if (slot == 0) {
+                       error("empty leaf [%llu %u] root %llu", eb->start,
+                               root->fs_info->nodesize, root->objectid);
+                       err |= EIO;
+               }
+               goto out;
+       }
+
        btrfs_item_key_to_cpu(eb, &key, slot);
        type = key.type;
 
        switch (type) {
        case BTRFS_EXTENT_DATA_KEY:
-               ret = check_extent_data_item(root, eb, slot);
+               ret = check_extent_data_item(root, path, nrefs, account_bytes);
+               if (repair && ret)
+                       ret = repair_extent_data_item(trans, root, path, nrefs,
+                                                     ret);
                err |= ret;
                break;
        case BTRFS_BLOCK_GROUP_ITEM_KEY:
                ret = check_block_group_item(fs_info, eb, slot);
+               if (repair &&
+                   ret & REFERENCER_MISSING)
+                       ret = delete_extent_tree_item(trans, root, path);
                err |= ret;
                break;
        case BTRFS_DEV_ITEM_KEY:
@@ -12392,6 +13287,8 @@ next:
                break;
        case BTRFS_CHUNK_ITEM_KEY:
                ret = check_chunk_item(fs_info, eb, slot);
+               if (repair && ret)
+                       ret = repair_chunk_item(trans, root, path, ret);
                err |= ret;
                break;
        case BTRFS_DEV_EXTENT_KEY:
@@ -12400,15 +13297,19 @@ next:
                break;
        case BTRFS_EXTENT_ITEM_KEY:
        case BTRFS_METADATA_ITEM_KEY:
-               ret = check_extent_item(fs_info, eb, slot);
+               ret = check_extent_item(trans, fs_info, path);
                err |= ret;
                break;
        case BTRFS_EXTENT_CSUM_KEY:
                total_csum_bytes += btrfs_item_size_nr(eb, slot);
+               err |= ret;
                break;
        case BTRFS_TREE_BLOCK_REF_KEY:
                ret = check_tree_block_backref(fs_info, key.offset,
                                               key.objectid, -1);
+               if (repair &&
+                   ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
+                       ret = delete_extent_tree_item(trans, root, path);
                err |= ret;
                break;
        case BTRFS_EXTENT_DATA_REF_KEY:
@@ -12419,194 +13320,34 @@ next:
                                btrfs_extent_data_ref_offset(eb, dref),
                                key.objectid, 0,
                                btrfs_extent_data_ref_count(eb, dref));
+               if (repair &&
+                   ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
+                       ret = delete_extent_tree_item(trans, root, path);
                err |= ret;
                break;
        case BTRFS_SHARED_BLOCK_REF_KEY:
                ret = check_shared_block_backref(fs_info, key.offset,
                                                 key.objectid, -1);
+               if (repair &&
+                   ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
+                       ret = delete_extent_tree_item(trans, root, path);
                err |= ret;
                break;
        case BTRFS_SHARED_DATA_REF_KEY:
                ret = check_shared_data_backref(fs_info, key.offset,
                                                key.objectid);
+               if (repair &&
+                   ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
+                       ret = delete_extent_tree_item(trans, root, path);
                err |= ret;
                break;
        default:
                break;
        }
 
-       if (++slot < btrfs_header_nritems(eb))
-               goto next;
-
-       return err;
-}
-
-/*
- * Helper function for later fs/subvol tree check.  To determine if a tree
- * block should be checked.
- * This function will ensure only the direct referencer with lowest rootid to
- * check a fs/subvolume tree block.
- *
- * Backref check at extent tree would detect errors like missing subvolume
- * tree, so we can do aggressive check to reduce duplicated checks.
- */
-static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
-{
-       struct btrfs_root *extent_root = root->fs_info->extent_root;
-       struct btrfs_key key;
-       struct btrfs_path path;
-       struct extent_buffer *leaf;
-       int slot;
-       struct btrfs_extent_item *ei;
-       unsigned long ptr;
-       unsigned long end;
-       int type;
-       u32 item_size;
-       u64 offset;
-       struct btrfs_extent_inline_ref *iref;
-       int ret;
-
-       btrfs_init_path(&path);
-       key.objectid = btrfs_header_bytenr(eb);
-       key.type = BTRFS_METADATA_ITEM_KEY;
-       key.offset = (u64)-1;
-
-       /*
-        * Any failure in backref resolving means we can't determine
-        * whom the tree block belongs to.
-        * So in that case, we need to check that tree block
-        */
-       ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
-       if (ret < 0)
-               goto need_check;
-
-       ret = btrfs_previous_extent_item(extent_root, &path,
-                                        btrfs_header_bytenr(eb));
-       if (ret)
-               goto need_check;
-
-       leaf = path.nodes[0];
-       slot = path.slots[0];
-       btrfs_item_key_to_cpu(leaf, &key, slot);
-       ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
-
-       if (key.type == BTRFS_METADATA_ITEM_KEY) {
-               iref = (struct btrfs_extent_inline_ref *)(ei + 1);
-       } else {
-               struct btrfs_tree_block_info *info;
-
-               info = (struct btrfs_tree_block_info *)(ei + 1);
-               iref = (struct btrfs_extent_inline_ref *)(info + 1);
-       }
-
-       item_size = btrfs_item_size_nr(leaf, slot);
-       ptr = (unsigned long)iref;
-       end = (unsigned long)ei + item_size;
-       while (ptr < end) {
-               iref = (struct btrfs_extent_inline_ref *)ptr;
-               type = btrfs_extent_inline_ref_type(leaf, iref);
-               offset = btrfs_extent_inline_ref_offset(leaf, iref);
-
-               /*
-                * We only check the tree block if current root is
-                * the lowest referencer of it.
-                */
-               if (type == BTRFS_TREE_BLOCK_REF_KEY &&
-                   offset < root->objectid) {
-                       btrfs_release_path(&path);
-                       return 0;
-               }
-
-               ptr += btrfs_extent_inline_ref_size(type);
-       }
-       /*
-        * Normally we should also check keyed tree block ref, but that may be
-        * very time consuming.  Inlined ref should already make us skip a lot
-        * of refs now.  So skip search keyed tree block ref.
-        */
-
-need_check:
-       btrfs_release_path(&path);
-       return 1;
-}
-
-/*
- * Traversal function for tree block. We will do:
- * 1) Skip shared fs/subvolume tree blocks
- * 2) Update related bytes accounting
- * 3) Pre-order traversal
- */
-static int traverse_tree_block(struct btrfs_root *root,
-                               struct extent_buffer *node)
-{
-       struct extent_buffer *eb;
-       struct btrfs_key key;
-       struct btrfs_key drop_key;
-       int level;
-       u64 nr;
-       int i;
-       int err = 0;
-       int ret;
-
-       /*
-        * Skip shared fs/subvolume tree block, in that case they will
-        * be checked by referencer with lowest rootid
-        */
-       if (is_fstree(root->objectid) && !should_check(root, node))
-               return 0;
-
-       /* Update bytes accounting */
-       total_btree_bytes += node->len;
-       if (fs_root_objectid(btrfs_header_owner(node)))
-               total_fs_tree_bytes += node->len;
-       if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
-               total_extent_tree_bytes += node->len;
-
-       /* pre-order tranversal, check itself first */
-       level = btrfs_header_level(node);
-       ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
-                                  btrfs_header_level(node),
-                                  btrfs_header_owner(node));
-       err |= ret;
-       if (err)
-               error(
-       "check %s failed root %llu bytenr %llu level %d, force continue check",
-                       level ? "node":"leaf", root->objectid,
-                       btrfs_header_bytenr(node), btrfs_header_level(node));
-
-       if (!level) {
-               btree_space_waste += btrfs_leaf_free_space(root, node);
-               ret = check_leaf_items(root, node);
-               err |= ret;
-               return err;
-       }
-
-       nr = btrfs_header_nritems(node);
-       btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
-       btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
-               sizeof(struct btrfs_key_ptr);
-
-       /* Then check all its children */
-       for (i = 0; i < nr; i++) {
-               u64 blocknr = btrfs_node_blockptr(node, i);
-
-               btrfs_node_key_to_cpu(node, &key, i);
-               if (level == root->root_item.drop_level &&
-                   is_dropped_key(&key, &drop_key))
-                       continue;
-
-               /*
-                * As a btrfs tree has most 8 levels (0..7), so it's quite safe
-                * to call the function itself.
-                */
-               eb = read_tree_block(root->fs_info, blocknr, 0);
-               if (extent_buffer_uptodate(eb)) {
-                       ret = traverse_tree_block(root, eb);
-                       err |= ret;
-               }
-               free_extent_buffer(eb);
-       }
-
+       ++path->slots[0];
+       goto again;
+out:
        return err;
 }
 
@@ -12615,7 +13356,9 @@ static int traverse_tree_block(struct btrfs_root *root,
  */
 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
 {
+       struct btrfs_trans_handle *trans = NULL;
        struct btrfs_path path;
+       struct btrfs_key old_key;
        struct btrfs_key key;
        struct btrfs_root *root1;
        struct btrfs_root *root;
@@ -12625,12 +13368,20 @@ static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
 
        root = fs_info->fs_root;
 
+       if (repair) {
+               trans = btrfs_start_transaction(fs_info->extent_root, 1);
+               if (IS_ERR(trans)) {
+                       error("failed to start transaction before check");
+                       return PTR_ERR(trans);
+               }
+       }
+
        root1 = root->fs_info->chunk_root;
-       ret = traverse_tree_block(root1, root1->node);
+       ret = check_btrfs_root(trans, root1, 0, 1);
        err |= ret;
 
        root1 = root->fs_info->tree_root;
-       ret = traverse_tree_block(root1, root1->node);
+       ret = check_btrfs_root(trans, root1, 0, 1);
        err |= ret;
 
        btrfs_init_path(&path);
@@ -12640,7 +13391,7 @@ static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
 
        ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
        if (ret) {
-               error("cannot find extent treet in tree_root");
+               error("cannot find extent tree in tree_root");
                goto out;
        }
 
@@ -12648,6 +13399,7 @@ static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
                btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
                if (key.type != BTRFS_ROOT_ITEM_KEY)
                        goto next;
+               old_key = key;
                key.offset = (u64)-1;
 
                if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
@@ -12660,19 +13412,38 @@ static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
                        goto next;
                }
 
-               ret = traverse_tree_block(cur_root, cur_root->node);
+               ret = check_btrfs_root(trans, cur_root, 0, 1);
                err |= ret;
 
                if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
                        btrfs_free_fs_root(cur_root);
+
+               btrfs_release_path(&path);
+               ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
+                                       &old_key, &path, 0, 0);
+               if (ret)
+                       goto out;
 next:
                ret = btrfs_next_item(root1, &path);
                if (ret)
                        goto out;
        }
-
 out:
+
+       /* if repair, update block accounting */
+       if (repair) {
+               ret = btrfs_fix_block_accounting(trans, root);
+               if (ret)
+                       err |= ret;
+               else
+                       err &= ~BG_ACCOUNTING_ERROR;
+       }
+
+       if (trans)
+               btrfs_commit_transaction(trans, root->fs_info->extent_root);
+
        btrfs_release_path(&path);
+
        return err;
 }
 
@@ -12687,6 +13458,12 @@ static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
        else
                ret = check_chunks_and_extents(fs_info);
 
+       /* Also repair device size related problems */
+       if (repair && !ret) {
+               ret = btrfs_fix_device_and_super_size(fs_info);
+               if (ret > 0)
+                       ret = 0;
+       }
        return ret;
 }
 
@@ -14178,31 +14955,36 @@ int cmd_check(int argc, char **argv)
                goto close_out;
        }
 
+       if (!init_extent_tree) {
+               ret = repair_root_items(info);
+               if (ret < 0) {
+                       err = !!ret;
+                       error("failed to repair root items: %s", strerror(-ret));
+                       goto close_out;
+               }
+               if (repair) {
+                       fprintf(stderr, "Fixed %d roots.\n", ret);
+                       ret = 0;
+               } else if (ret > 0) {
+                       fprintf(stderr,
+                               "Found %d roots with an outdated root item.\n",
+                               ret);
+                       fprintf(stderr,
+       "Please run a filesystem check with the option --repair to fix them.\n");
+                       ret = 1;
+                       err |= ret;
+                       goto close_out;
+               }
+       }
+
        ret = do_check_chunks_and_extents(info);
        err |= !!ret;
        if (ret)
                error(
                "errors found in extent allocation tree or chunk allocation");
 
-       ret = repair_root_items(info);
-       err |= !!ret;
-       if (ret < 0) {
-               error("failed to repair root items: %s", strerror(-ret));
-               goto close_out;
-       }
-       if (repair) {
-               fprintf(stderr, "Fixed %d roots.\n", ret);
-               ret = 0;
-       } else if (ret > 0) {
-               fprintf(stderr,
-                      "Found %d roots with an outdated root item.\n",
-                      ret);
-               fprintf(stderr,
-                       "Please run a filesystem check with the option --repair to fix them.\n");
-               ret = 1;
-               err |= !!ret;
-               goto close_out;
-       }
+       /* Only re-check super size after we checked and repaired the fs */
+       err |= !is_super_size_valid(info);
 
        if (!ctx.progress_enabled) {
                if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))