btrfs-progs: add basic awareness of the free space tree
[platform/upstream/btrfs-progs.git] / extent-tree.c
index 3f54706..e04d962 100644 (file)
@@ -19,6 +19,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <math.h>
 #include "kerncompat.h"
 #include "radix-tree.h"
 #include "ctree.h"
@@ -28,7 +29,7 @@
 #include "crc32c.h"
 #include "volumes.h"
 #include "free-space-cache.h"
-#include "math.h"
+#include "utils.h"
 
 #define PENDING_EXTENT_INSERT 0
 #define PENDING_EXTENT_DELETE 1
@@ -57,6 +58,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
                                 btrfs_root *extent_root);
 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
                               btrfs_root *extent_root);
+static struct btrfs_block_group_cache *
+btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
+                      *hint, u64 search_start, int data, int owner);
 
 static int remove_sb_from_cache(struct btrfs_root *root,
                                struct btrfs_block_group_cache *cache)
@@ -314,10 +318,9 @@ static int block_group_state_bits(u64 flags)
        return bits;
 }
 
-struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
-                                                struct btrfs_block_group_cache
-                                                *hint, u64 search_start,
-                                                int data, int owner)
+static struct btrfs_block_group_cache *
+btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
+                      *hint, u64 search_start, int data, int owner)
 {
        struct btrfs_block_group_cache *cache;
        struct extent_io_tree *block_group_cache;
@@ -560,7 +563,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
                        break;
                }
        }
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        if (owner < BTRFS_FIRST_FREE_OBJECTID)
                new_size += sizeof(*bi);
@@ -667,7 +670,7 @@ again:
                        return 0;
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
                key.type = BTRFS_EXTENT_REF_V0_KEY;
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
                ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
                if (ret < 0) {
                        err = ret;
@@ -705,7 +708,7 @@ again:
                if (match_extent_data_ref(leaf, ref, root_objectid,
                                          owner, offset)) {
                        if (recow) {
-                               btrfs_release_path(root, path);
+                               btrfs_release_path(path);
                                goto again;
                        }
                        err = 0;
@@ -766,7 +769,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
                        if (match_extent_data_ref(leaf, ref, root_objectid,
                                                  owner, offset))
                                break;
-                       btrfs_release_path(root, path);
+                       btrfs_release_path(path);
 
                        key.offset++;
                        ret = btrfs_insert_empty_item(trans, root, path, &key,
@@ -793,7 +796,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        ret = 0;
 fail:
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
        return ret;
 }
 
@@ -919,7 +922,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
                ret = -ENOENT;
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
        if (ret == -ENOENT && parent) {
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
                key.type = BTRFS_EXTENT_REF_V0_KEY;
                ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
                if (ret > 0)
@@ -949,7 +952,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
 
        ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
        return ret;
 }
 
@@ -970,27 +973,6 @@ static inline int extent_ref_type(u64 parent, u64 owner)
        return type;
 }
 
-static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
-
-{
-       int level;
-       for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
-               if (!path->nodes[level])
-                       break;
-               if (path->slots[level] + 1 >=
-                   btrfs_header_nritems(path->nodes[level]))
-                       continue;
-               if (level == 0)
-                       btrfs_item_key_to_cpu(path->nodes[level], key,
-                                             path->slots[level] + 1);
-               else
-                       btrfs_node_key_to_cpu(path->nodes[level], key,
-                                             path->slots[level] + 1);
-               return 0;
-       }
-       return 1;
-}
-
 static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 struct btrfs_path *path,
@@ -1059,6 +1041,7 @@ again:
                if (ret) {
                        key.type = BTRFS_EXTENT_ITEM_KEY;
                        key.offset = num_bytes;
+                       btrfs_release_path(path);
                        goto again;
                }
        }
@@ -1258,7 +1241,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
        if (ret != -ENOENT)
                return ret;
 
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
        *ref_ret = NULL;
 
        if (owner < BTRFS_FIRST_FREE_OBJECTID) {
@@ -1415,7 +1398,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        path->reada = 1;
-       path->leave_spinning = 1;
 
        ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
                                           path, bytenr, num_bytes, parent,
@@ -1434,10 +1416,9 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        btrfs_set_extent_refs(leaf, item, refs + 1);
 
        btrfs_mark_buffer_dirty(leaf);
-       btrfs_release_path(root->fs_info->extent_root, path);
+       btrfs_release_path(path);
 
        path->reada = 1;
-       path->leave_spinning = 1;
 
        /* now insert the actual backref */
        ret = insert_extent_backref(trans, root->fs_info->extent_root,
@@ -1482,6 +1463,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
        }
 
        path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
        path->reada = 1;
 
        key.objectid = bytenr;
@@ -1504,17 +1487,18 @@ again:
         * to make sure.
         */
        if (ret > 0 && metadata) {
-               if (path->slots) {
+               if (path->slots[0]) {
                        path->slots[0]--;
                        btrfs_item_key_to_cpu(path->nodes[0], &key,
                                              path->slots[0]);
                        if (key.objectid == bytenr &&
-                           key.type == BTRFS_METADATA_ITEM_KEY)
+                           key.type == BTRFS_EXTENT_ITEM_KEY &&
+                           key.offset == root->leafsize)
                                ret = 0;
                }
 
                if (ret) {
-                       btrfs_release_path(root, path);
+                       btrfs_release_path(path);
                        key.type = BTRFS_EXTENT_ITEM_KEY;
                        key.offset = root->leafsize;
                        metadata = 0;
@@ -1554,7 +1538,7 @@ again:
                *flags = extent_flags;
 out:
        btrfs_free_path(path);
-       return 0;
+       return ret;
 }
 
 int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
@@ -1572,6 +1556,8 @@ int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
                                  BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
 
        path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
        path->reada = 1;
 
        key.objectid = bytenr;
@@ -1591,7 +1577,7 @@ again:
 
        if (ret > 0 && skinny_metadata) {
                skinny_metadata = 0;
-               if (path->slots[0]--) {
+               if (path->slots[0]) {
                        path->slots[0]--;
                        btrfs_item_key_to_cpu(path->nodes[0], &key,
                                              path->slots[0]);
@@ -1601,7 +1587,7 @@ again:
                                ret = 0;
                }
                if (ret) {
-                       btrfs_release_path(root, path);
+                       btrfs_release_path(path);
                        key.offset = root->leafsize;
                        key.type = BTRFS_EXTENT_ITEM_KEY;
                        goto again;
@@ -1747,7 +1733,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
        bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
        write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
        btrfs_mark_buffer_dirty(leaf);
-       btrfs_release_path(extent_root, path);
+       btrfs_release_path(path);
 fail:
        finish_current_insert(trans, extent_root);
        pending_ret = del_pending_extents(trans, extent_root);
@@ -1803,11 +1789,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
                                                  u64 flags)
 {
-       struct list_head *head = &info->space_info;
-       struct list_head *cur;
        struct btrfs_space_info *found;
-       list_for_each(cur, head) {
-               found = list_entry(cur, struct btrfs_space_info, list);
+
+       flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+       list_for_each_entry(found, &info->space_info, list) {
                if (found->flags & flags)
                        return found;
        }
@@ -1815,6 +1801,31 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 
 }
 
+static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags,
+                          u64 total_bytes, u64 bytes_used,
+                          struct btrfs_space_info **space_info)
+{
+       struct btrfs_space_info *found;
+
+       /* only support free block group which is empty */
+       if (bytes_used)
+               return -ENOTEMPTY;
+
+       found = __find_space_info(fs_info, flags);
+       if (!found)
+               return -ENOENT;
+       if (found->total_bytes < total_bytes) {
+               fprintf(stderr,
+                       "WARNING: bad space info to free %llu only have %llu\n",
+                       total_bytes, found->total_bytes);
+               return -EINVAL;
+       }
+       found->total_bytes -= total_bytes;
+       if (space_info)
+               *space_info = found;
+       return 0;
+}
+
 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                             u64 total_bytes, u64 bytes_used,
                             struct btrfs_space_info **space_info)
@@ -1839,7 +1850,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                return -ENOMEM;
 
        list_add(&found->list, &info->space_info);
-       found->flags = flags;
+       found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
        found->total_bytes = total_bytes;
        found->bytes_used = bytes_used;
        found->bytes_pinned = 0;
@@ -2010,25 +2021,6 @@ next:
        return 0;
 }
 
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
-{
-       u64 last = 0;
-       u64 start;
-       u64 end;
-       struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
-       int ret;
-
-       while(1) {
-               ret = find_first_extent_bit(pinned_extents, last,
-                                           &start, &end, EXTENT_DIRTY);
-               if (ret)
-                       break;
-               set_extent_dirty(copy, start, end, GFP_NOFS);
-               last = end + 1;
-       }
-       return 0;
-}
-
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               struct extent_io_tree *unpin)
@@ -2073,7 +2065,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
        u64 end;
        u64 priv;
        struct btrfs_fs_info *info = extent_root->fs_info;
-       struct btrfs_path *path;
        struct pending_extent_op *extent_op;
        struct btrfs_key key;
        int ret;
@@ -2081,8 +2072,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
                btrfs_fs_incompat(extent_root->fs_info,
                                  BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
 
-       path = btrfs_alloc_path();
-
        while(1) {
                ret = find_first_extent_bit(&info->extent_ins, 0, &start,
                                            &end, EXTENT_LOCKED);
@@ -2117,7 +2106,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
                                  GFP_NOFS);
                kfree(extent_op);
        }
-       btrfs_free_path(path);
        return 0;
 }
 
@@ -2210,7 +2198,6 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        path->reada = 1;
-       path->leave_spinning = 1;
 
        is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
        if (is_data)
@@ -2253,8 +2240,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                                                    NULL, refs_to_drop,
                                                    is_data);
                        BUG_ON(ret);
-                       btrfs_release_path(extent_root, path);
-                       path->leave_spinning = 1;
+                       btrfs_release_path(path);
 
                        key.objectid = bytenr;
 
@@ -2281,7 +2267,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 
                        if (ret > 0 && skinny_metadata) {
                                skinny_metadata = 0;
-                               btrfs_release_path(extent_root, path);
+                               btrfs_release_path(path);
                                key.type = BTRFS_EXTENT_ITEM_KEY;
                                key.offset = num_bytes;
                                ret = btrfs_search_slot(trans, extent_root,
@@ -2318,8 +2304,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                                             owner_objectid, 0);
                BUG_ON(ret < 0);
 
-               btrfs_release_path(extent_root, path);
-               path->leave_spinning = 1;
+               btrfs_release_path(path);
 
                key.objectid = bytenr;
                key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -2406,7 +2391,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
                                      num_to_del);
                BUG_ON(ret);
-               btrfs_release_path(extent_root, path);
+               btrfs_release_path(path);
 
                if (is_data) {
                        ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
@@ -2606,6 +2591,13 @@ check_failed:
                goto new_group;
        }
 
+       if (info->excluded_extents &&
+           test_range_bit(info->excluded_extents, ins->objectid,
+                          ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
+               search_start = ins->objectid + num_bytes;
+               goto new_group;
+       }
+
        if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
            ins->objectid < exclude_start + exclude_nr)) {
                search_start = exclude_start + exclude_nr;
@@ -2613,6 +2605,11 @@ check_failed:
        }
 
        if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
+               if (check_crossing_stripes(ins->objectid, num_bytes)) {
+                       search_start = round_down(ins->objectid + num_bytes,
+                                                 BTRFS_STRIPE_LEN);
+                       goto new_group;
+               }
                block_group = btrfs_lookup_block_group(info, ins->objectid);
                if (block_group)
                        trans->block_group = block_group;
@@ -2644,11 +2641,11 @@ error:
        return ret;
 }
 
-static int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
-                               u64 num_bytes, u64 empty_size,
-                               u64 hint_byte, u64 search_end,
-                               struct btrfs_key *ins, int data)
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_root *root,
+                        u64 num_bytes, u64 empty_size,
+                        u64 hint_byte, u64 search_end,
+                        struct btrfs_key *ins, int data)
 {
        int ret;
        u64 search_start = 0;
@@ -2657,7 +2654,7 @@ static int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 
        if (info->extent_ops) {
                struct btrfs_extent_ops *ops = info->extent_ops;
-               ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
+               ret = ops->alloc_extent(root, num_bytes, hint_byte, ins, !data);
                BUG_ON(ret);
                goto found;
        }
@@ -2726,7 +2723,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
-       path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
                                      ins, size);
        BUG_ON(ret);
@@ -3003,6 +2999,13 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                        next = read_tree_block(root, bytenr, blocksize,
                                               ptr_gen);
                        mutex_lock(&root->fs_info->fs_mutex);
+                       if (!extent_buffer_uptodate(next)) {
+                               if (IS_ERR(next))
+                                       ret = PTR_ERR(next);
+                               else
+                                       ret = -EIO;
+                               break;
+                       }
                }
                WARN_ON(*level <= 0);
                if (path->nodes[*level-1])
@@ -3105,8 +3108,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                        break;
                ret = get_state_private(&info->block_group_cache, start, &ptr);
                if (!ret) {
-                       cache = (struct btrfs_block_group_cache *)
-                                       (uintptr_t)ptr;
+                       cache = u64_to_ptr(ptr);
                        if (cache->free_space_ctl) {
                                btrfs_remove_free_space_cache(cache);
                                kfree(cache->free_space_ctl);
@@ -3134,8 +3136,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
        return 0;
 }
 
-int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
-                          struct btrfs_key *key)
+static int find_first_block_group(struct btrfs_root *root,
+               struct btrfs_path *path, struct btrfs_key *key)
 {
        int ret;
        struct btrfs_key found_key;
@@ -3168,6 +3170,54 @@ error:
        return ret;
 }
 
+static void account_super_bytes(struct btrfs_fs_info *fs_info,
+                               struct btrfs_block_group_cache *cache)
+{
+       u64 bytenr;
+       u64 *logical;
+       int stripe_len;
+       int i, nr, ret;
+
+       if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
+               stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
+               cache->bytes_super += stripe_len;
+       }
+
+       for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+               bytenr = btrfs_sb_offset(i);
+               ret = btrfs_rmap_block(&fs_info->mapping_tree,
+                                      cache->key.objectid, bytenr,
+                                      0, &logical, &nr, &stripe_len);
+               if (ret)
+                       return;
+
+               while (nr--) {
+                       u64 start, len;
+
+                       if (logical[nr] > cache->key.objectid +
+                           cache->key.offset)
+                               continue;
+
+                       if (logical[nr] + stripe_len <= cache->key.objectid)
+                               continue;
+
+                       start = logical[nr];
+                       if (start < cache->key.objectid) {
+                               start = cache->key.objectid;
+                               len = (logical[nr] + stripe_len) - start;
+                       } else {
+                               len = min_t(u64, stripe_len,
+                                           cache->key.objectid +
+                                           cache->key.offset - start);
+                       }
+
+                       cache->bytes_super += len;
+               }
+
+               kfree(logical);
+       }
+}
+
 int btrfs_read_block_groups(struct btrfs_root *root)
 {
        struct btrfs_path *path;
@@ -3205,7 +3255,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                cache = kzalloc(sizeof(*cache), GFP_NOFS);
                if (!cache) {
                        ret = -ENOMEM;
-                       break;
+                       goto error;
                }
 
                read_extent_buffer(leaf, &cache->item,
@@ -3215,7 +3265,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                cache->cached = 0;
                cache->pinned = 0;
                key.objectid = found_key.objectid + found_key.offset;
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
                cache->flags = btrfs_block_group_flags(&cache->item);
                bit = 0;
                if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
@@ -3229,6 +3279,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                if (btrfs_chunk_readonly(root, cache->key.objectid))
                        cache->ro = 1;
 
+               account_super_bytes(info, cache);
+
                ret = update_space_info(info, cache->flags, found_key.offset,
                                        btrfs_block_group_used(&cache->item),
                                        &space_info);
@@ -3270,6 +3322,7 @@ btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
        cache->flags = type;
        btrfs_set_block_group_flags(&cache->item, type);
 
+       account_super_bytes(fs_info, cache);
        ret = update_space_info(fs_info, cache->flags, size, bytes_used,
                                &cache->space_info);
        BUG_ON(ret);
@@ -3424,86 +3477,355 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                                  alloc, mark_free);
 }
 
-static int btrfs_count_extents_in_block_group(struct btrfs_root *root,
-                                             struct btrfs_path *path, u64 start,
-                                             u64 len,
-                                             u64 *total)
+/*
+ * Just remove a block group item in extent tree
+ * Caller should ensure the block group is empty and all space is pinned.
+ * Or new tree block/data may be allocated into it.
+ */
+static int free_block_group_item(struct btrfs_trans_handle *trans,
+                                struct btrfs_fs_info *fs_info,
+                                u64 bytenr, u64 len)
 {
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       struct btrfs_root *root = fs_info->extent_root;
+       int ret = 0;
+
+       key.objectid = bytenr;
+       key.offset = len;
+       key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+       if (ret < 0)
+               goto out;
+
+       ret = btrfs_del_item(trans, root, path);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int free_dev_extent_item(struct btrfs_trans_handle *trans,
+                               struct btrfs_fs_info *fs_info,
+                               u64 devid, u64 dev_offset)
+{
+       struct btrfs_root *root = fs_info->dev_root;
+       struct btrfs_path *path;
        struct btrfs_key key;
-       struct extent_buffer *leaf;
-       u64 bytes_used = 0;
        int ret;
-       int slot;
 
-       key.offset = 0;
-       key.objectid = start;
-       btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
-       ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
-                               &key, path, 0, 0);
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = devid;
+       key.type = BTRFS_DEV_EXTENT_KEY;
+       key.offset = dev_offset;
+
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret < 0)
-               return ret;
-       while(1) {
-               leaf = path->nodes[0];
-               slot = path->slots[0];
-               if (slot >= btrfs_header_nritems(leaf)) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret < 0)
-                               return ret;
-                       if (ret > 0)
-                               break;
-                       leaf = path->nodes[0];
-                       slot = path->slots[0];
+               goto out;
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       ret = btrfs_del_item(trans, root, path);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans,
+                                      struct btrfs_fs_info *fs_info,
+                                      u64 chunk_offset)
+{
+       struct btrfs_chunk *chunk = NULL;
+       struct btrfs_root *root= fs_info->chunk_root;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       u16 num_stripes;
+       int i;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+       key.type = BTRFS_CHUNK_ITEM_KEY;
+       key.offset = chunk_offset;
+
+       ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+       chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                              struct btrfs_chunk);
+       num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk);
+       for (i = 0; i < num_stripes; i++) {
+               ret = free_dev_extent_item(trans, fs_info,
+                       btrfs_stripe_devid_nr(path->nodes[0], chunk, i),
+                       btrfs_stripe_offset_nr(path->nodes[0], chunk, i));
+               if (ret < 0)
+                       goto out;
+       }
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int free_system_chunk_item(struct btrfs_super_block *super,
+                                 struct btrfs_key *key)
+{
+       struct btrfs_disk_key *disk_key;
+       struct btrfs_key cpu_key;
+       u32 array_size = btrfs_super_sys_array_size(super);
+       char *ptr = (char *)super->sys_chunk_array;
+       int cur = 0;
+       int ret = -ENOENT;
+
+       while (cur < btrfs_super_sys_array_size(super)) {
+               struct btrfs_chunk *chunk;
+               u32 num_stripes;
+               u32 chunk_len;
+
+               disk_key = (struct btrfs_disk_key *)(ptr + cur);
+               btrfs_disk_key_to_cpu(&cpu_key, disk_key);
+               if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) {
+                       /* just in case */
+                       ret = -EIO;
+                       goto out;
                }
-               btrfs_item_key_to_cpu(leaf, &key, slot);
-               if (key.objectid > start + len)
-                       break;
-               if (key.type == BTRFS_EXTENT_ITEM_KEY)
-                       bytes_used += key.offset;
-               if (key.type == BTRFS_METADATA_ITEM_KEY)
-                       bytes_used += root->leafsize;
-               path->slots[0]++;
+
+               chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key));
+               num_stripes = btrfs_stack_chunk_num_stripes(chunk);
+               chunk_len = btrfs_chunk_item_size(num_stripes) +
+                           sizeof(*disk_key);
+
+               if (key->objectid == cpu_key.objectid &&
+                   key->offset == cpu_key.offset &&
+                   key->type == cpu_key.type) {
+                       memmove(ptr + cur, ptr + cur + chunk_len,
+                               array_size - cur - chunk_len);
+                       array_size -= chunk_len;
+                       btrfs_set_super_sys_array_size(super, array_size);
+                       ret = 0;
+                       goto out;
+               }
+
+               cur += chunk_len;
        }
-       *total = bytes_used;
-       btrfs_release_path(root, path);
-       return 0;
+out:
+       return ret;
 }
 
-int btrfs_check_block_accounting(struct btrfs_root *root)
+static int free_chunk_item(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          u64 bytenr, u64 len)
 {
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       struct btrfs_root *root = fs_info->chunk_root;
+       struct btrfs_chunk *chunk;
+       u64 chunk_type;
        int ret;
-       u64 start = 0;
-       u64 bytes_used = 0;
-       struct btrfs_path path;
+
+       key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+       key.offset = bytenr;
+       key.type = BTRFS_CHUNK_ITEM_KEY;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+       if (ret < 0)
+               goto out;
+       chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                              struct btrfs_chunk);
+       chunk_type = btrfs_chunk_type(path->nodes[0], chunk);
+
+       ret = btrfs_del_item(trans, root, path);
+       if (ret < 0)
+               goto out;
+
+       if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
+               ret = free_system_chunk_item(fs_info->super_copy, &key);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static u64 get_dev_extent_len(struct map_lookup *map)
+{
+       int div;
+
+       switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+       case 0: /* Single */
+       case BTRFS_BLOCK_GROUP_DUP:
+       case BTRFS_BLOCK_GROUP_RAID1:
+               div = 1;
+               break;
+       case BTRFS_BLOCK_GROUP_RAID5:
+               div = (map->num_stripes - 1);
+               break;
+       case BTRFS_BLOCK_GROUP_RAID6:
+               div = (map->num_stripes - 2);
+               break;
+       case BTRFS_BLOCK_GROUP_RAID10:
+               div = (map->num_stripes / map->sub_stripes);
+               break;
+       default:
+               /* normally, read chunk security hook should handled it */
+               BUG_ON(1);
+       }
+       return map->ce.size / div;
+}
+
+/* free block group/chunk related caches */
+static int free_block_group_cache(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 u64 bytenr, u64 len)
+{
        struct btrfs_block_group_cache *cache;
-       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct cache_extent *ce;
+       struct map_lookup *map;
+       int ret;
+       int i;
+       u64 flags;
 
-       btrfs_init_path(&path);
+       /* Free block group cache first */
+       cache = btrfs_lookup_block_group(fs_info, bytenr);
+       if (!cache)
+               return -ENOENT;
+       flags = cache->flags;
+       if (cache->free_space_ctl) {
+               btrfs_remove_free_space_cache(cache);
+               kfree(cache->free_space_ctl);
+       }
+       clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len,
+                         (unsigned int)-1, GFP_NOFS);
+       ret = free_space_info(fs_info, flags, len, 0, NULL);
+       if (ret < 0)
+               goto out;
+       kfree(cache);
 
-       while(1) {
-               cache = btrfs_lookup_block_group(fs_info, start);
-               if (!cache)
-                       break;
+       /* Then free mapping info and dev usage info */
+       ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr);
+       if (!ce || ce->start != bytenr) {
+               ret = -ENOENT;
+               goto out;
+       }
+       map = container_of(ce, struct map_lookup, ce);
+       for (i = 0; i < map->num_stripes; i++) {
+               struct btrfs_device *device;
+
+               device = map->stripes[i].dev;
+               device->bytes_used -= get_dev_extent_len(map);
+               ret = btrfs_update_device(trans, device);
+               if (ret < 0)
+                       goto out;
+       }
+       remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce);
+       free(map);
+out:
+       return ret;
+}
 
-               ret = btrfs_count_extents_in_block_group(root, &path,
-                                                        cache->key.objectid,
-                                                        cache->key.offset,
-                                                        &bytes_used);
+int btrfs_free_block_group(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
+{
+       struct btrfs_root *extent_root = fs_info->extent_root;
+       struct btrfs_path *path;
+       struct btrfs_block_group_item *bgi;
+       struct btrfs_key key;
+       int ret = 0;
 
-               if (ret == 0) {
-                       u64 on_disk = btrfs_block_group_used(&cache->item);
-                       if (on_disk != bytes_used) {
-                               fprintf(stderr, "bad block group accounting found %llu "
-                                       "expected %llu block group %llu\n",
-                                       (unsigned long long)bytes_used,
-                                       (unsigned long long)on_disk,
-                                       (unsigned long long)cache->key.objectid);
-                       }
-               }
-               start = cache->key.objectid + cache->key.offset;
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
 
-               cache->space_info->bytes_used = 0;
+       key.objectid = bytenr;
+       key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+       key.offset = len;
+
+       /* Double check the block group to ensure it's empty */
+       ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
+       if (ret > 0) {
+               ret = -ENONET;
+               goto out;
        }
-       return 0;
+       if (ret < 0)
+               goto out;
+
+       bgi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                            struct btrfs_block_group_item);
+       if (btrfs_disk_block_group_used(path->nodes[0], bgi)) {
+               fprintf(stderr,
+                       "WARNING: block group [%llu,%llu) is not empty\n",
+                       bytenr, bytenr + len);
+               ret = -EINVAL;
+               goto out;
+       }
+       btrfs_release_path(path);
+
+       /*
+        * Now pin all space in the block group, to prevent further transaction
+        * allocate space from it.
+        * Every operation needs a transaction must be in the range.
+        */
+       btrfs_pin_extent(fs_info, bytenr, len);
+
+       /* delete block group item and chunk item */
+       ret = free_block_group_item(trans, fs_info, bytenr, len);
+       if (ret < 0) {
+               fprintf(stderr,
+                       "failed to free block group item for [%llu,%llu)\n",
+                       bytenr, bytenr + len);
+               btrfs_unpin_extent(fs_info, bytenr, len);
+               goto out;
+       }
+
+       ret = free_chunk_dev_extent_items(trans, fs_info, bytenr);
+       if (ret < 0) {
+               fprintf(stderr,
+                       "failed to dev extents belongs to [%llu,%llu)\n",
+                       bytenr, bytenr + len);
+               btrfs_unpin_extent(fs_info, bytenr, len);
+               goto out;
+       }
+       ret = free_chunk_item(trans, fs_info, bytenr, len);
+       if (ret < 0) {
+               fprintf(stderr,
+                       "failed to free chunk for [%llu,%llu)\n",
+                       bytenr, bytenr + len);
+               btrfs_unpin_extent(fs_info, bytenr, len);
+               goto out;
+       }
+
+       /* Now release the block_group_cache */
+       ret = free_block_group_cache(trans, fs_info, bytenr, len);
+       btrfs_unpin_extent(fs_info, bytenr, len);
+
+out:
+       btrfs_free_path(path);
+       return ret;
 }
 
 /*
@@ -3582,6 +3904,100 @@ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans,
                path.slots[0]++;
        }
        btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used);
-       btrfs_release_path(root, &path);
+       btrfs_release_path(&path);
        return 0;
 }
+
+/*
+ * Record a file extent. Do all the required works, such as inserting
+ * file extent item, inserting extent item and backref item into extent
+ * tree and updating block accounting.
+ */
+int btrfs_record_file_extent(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root, u64 objectid,
+                             struct btrfs_inode_item *inode,
+                             u64 file_pos, u64 disk_bytenr,
+                             u64 num_bytes)
+{
+       int ret;
+       struct btrfs_fs_info *info = root->fs_info;
+       struct btrfs_root *extent_root = info->extent_root;
+       struct extent_buffer *leaf;
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_key ins_key;
+       struct btrfs_path path;
+       struct btrfs_extent_item *ei;
+       u64 nbytes;
+
+       if (disk_bytenr == 0) {
+               ret = btrfs_insert_file_extent(trans, root, objectid,
+                                               file_pos, disk_bytenr,
+                                               num_bytes, num_bytes);
+               return ret;
+       }
+
+       btrfs_init_path(&path);
+
+       ins_key.objectid = objectid;
+       ins_key.offset = file_pos;
+       btrfs_set_key_type(&ins_key, BTRFS_EXTENT_DATA_KEY);
+       ret = btrfs_insert_empty_item(trans, root, &path, &ins_key,
+                                     sizeof(*fi));
+       if (ret)
+               goto fail;
+       leaf = path.nodes[0];
+       fi = btrfs_item_ptr(leaf, path.slots[0],
+                           struct btrfs_file_extent_item);
+       btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+       btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
+       btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
+       btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
+       btrfs_set_file_extent_offset(leaf, fi, 0);
+       btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+       btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+       btrfs_set_file_extent_compression(leaf, fi, 0);
+       btrfs_set_file_extent_encryption(leaf, fi, 0);
+       btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+       btrfs_mark_buffer_dirty(leaf);
+
+       nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes;
+       btrfs_set_stack_inode_nbytes(inode, nbytes);
+
+       btrfs_release_path(&path);
+
+       ins_key.objectid = disk_bytenr;
+       ins_key.offset = num_bytes;
+       ins_key.type = BTRFS_EXTENT_ITEM_KEY;
+
+       ret = btrfs_insert_empty_item(trans, extent_root, &path,
+                                     &ins_key, sizeof(*ei));
+       if (ret == 0) {
+               leaf = path.nodes[0];
+               ei = btrfs_item_ptr(leaf, path.slots[0],
+                                   struct btrfs_extent_item);
+
+               btrfs_set_extent_refs(leaf, ei, 0);
+               btrfs_set_extent_generation(leaf, ei, 0);
+               btrfs_set_extent_flags(leaf, ei, BTRFS_EXTENT_FLAG_DATA);
+
+               btrfs_mark_buffer_dirty(leaf);
+
+               ret = btrfs_update_block_group(trans, root, disk_bytenr,
+                                              num_bytes, 1, 0);
+               if (ret)
+                       goto fail;
+       } else if (ret != -EEXIST) {
+               goto fail;
+       }
+       btrfs_extent_post_op(trans, extent_root);
+
+       ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, 0,
+                                  root->root_key.objectid,
+                                  objectid, file_pos);
+       if (ret)
+               goto fail;
+       ret = 0;
+fail:
+       btrfs_release_path(&path);
+       return ret;
+}