btrfs-progs: add basic awareness of the free space tree
[platform/upstream/btrfs-progs.git] / extent-tree.c
index c24af6a..e04d962 100644 (file)
@@ -19,6 +19,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <math.h>
 #include "kerncompat.h"
 #include "radix-tree.h"
 #include "ctree.h"
@@ -28,7 +29,6 @@
 #include "crc32c.h"
 #include "volumes.h"
 #include "free-space-cache.h"
-#include "math.h"
 #include "utils.h"
 
 #define PENDING_EXTENT_INSERT 0
@@ -1801,6 +1801,31 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 
 }
 
+static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags,
+                          u64 total_bytes, u64 bytes_used,
+                          struct btrfs_space_info **space_info)
+{
+       struct btrfs_space_info *found;
+
+       /* only support free block group which is empty */
+       if (bytes_used)
+               return -ENOTEMPTY;
+
+       found = __find_space_info(fs_info, flags);
+       if (!found)
+               return -ENOENT;
+       if (found->total_bytes < total_bytes) {
+               fprintf(stderr,
+                       "WARNING: bad space info to free %llu only have %llu\n",
+                       total_bytes, found->total_bytes);
+               return -EINVAL;
+       }
+       found->total_bytes -= total_bytes;
+       if (space_info)
+               *space_info = found;
+       return 0;
+}
+
 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                             u64 total_bytes, u64 bytes_used,
                             struct btrfs_space_info **space_info)
@@ -2580,6 +2605,11 @@ check_failed:
        }
 
        if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
+               if (check_crossing_stripes(ins->objectid, num_bytes)) {
+                       search_start = round_down(ins->objectid + num_bytes,
+                                                 BTRFS_STRIPE_LEN);
+                       goto new_group;
+               }
                block_group = btrfs_lookup_block_group(info, ins->objectid);
                if (block_group)
                        trans->block_group = block_group;
@@ -2624,7 +2654,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 
        if (info->extent_ops) {
                struct btrfs_extent_ops *ops = info->extent_ops;
-               ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
+               ret = ops->alloc_extent(root, num_bytes, hint_byte, ins, !data);
                BUG_ON(ret);
                goto found;
        }
@@ -3225,7 +3255,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                cache = kzalloc(sizeof(*cache), GFP_NOFS);
                if (!cache) {
                        ret = -ENOMEM;
-                       break;
+                       goto error;
                }
 
                read_extent_buffer(leaf, &cache->item,
@@ -3448,6 +3478,357 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
 }
 
 /*
+ * Just remove a block group item in extent tree
+ * Caller should ensure the block group is empty and all space is pinned.
+ * Or new tree block/data may be allocated into it.
+ */
+static int free_block_group_item(struct btrfs_trans_handle *trans,
+                                struct btrfs_fs_info *fs_info,
+                                u64 bytenr, u64 len)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       struct btrfs_root *root = fs_info->extent_root;
+       int ret = 0;
+
+       key.objectid = bytenr;
+       key.offset = len;
+       key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+       if (ret < 0)
+               goto out;
+
+       ret = btrfs_del_item(trans, root, path);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int free_dev_extent_item(struct btrfs_trans_handle *trans,
+                               struct btrfs_fs_info *fs_info,
+                               u64 devid, u64 dev_offset)
+{
+       struct btrfs_root *root = fs_info->dev_root;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = devid;
+       key.type = BTRFS_DEV_EXTENT_KEY;
+       key.offset = dev_offset;
+
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret < 0)
+               goto out;
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       ret = btrfs_del_item(trans, root, path);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans,
+                                      struct btrfs_fs_info *fs_info,
+                                      u64 chunk_offset)
+{
+       struct btrfs_chunk *chunk = NULL;
+       struct btrfs_root *root= fs_info->chunk_root;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       u16 num_stripes;
+       int i;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+       key.type = BTRFS_CHUNK_ITEM_KEY;
+       key.offset = chunk_offset;
+
+       ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+       chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                              struct btrfs_chunk);
+       num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk);
+       for (i = 0; i < num_stripes; i++) {
+               ret = free_dev_extent_item(trans, fs_info,
+                       btrfs_stripe_devid_nr(path->nodes[0], chunk, i),
+                       btrfs_stripe_offset_nr(path->nodes[0], chunk, i));
+               if (ret < 0)
+                       goto out;
+       }
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int free_system_chunk_item(struct btrfs_super_block *super,
+                                 struct btrfs_key *key)
+{
+       struct btrfs_disk_key *disk_key;
+       struct btrfs_key cpu_key;
+       u32 array_size = btrfs_super_sys_array_size(super);
+       char *ptr = (char *)super->sys_chunk_array;
+       int cur = 0;
+       int ret = -ENOENT;
+
+       while (cur < btrfs_super_sys_array_size(super)) {
+               struct btrfs_chunk *chunk;
+               u32 num_stripes;
+               u32 chunk_len;
+
+               disk_key = (struct btrfs_disk_key *)(ptr + cur);
+               btrfs_disk_key_to_cpu(&cpu_key, disk_key);
+               if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) {
+                       /* just in case */
+                       ret = -EIO;
+                       goto out;
+               }
+
+               chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key));
+               num_stripes = btrfs_stack_chunk_num_stripes(chunk);
+               chunk_len = btrfs_chunk_item_size(num_stripes) +
+                           sizeof(*disk_key);
+
+               if (key->objectid == cpu_key.objectid &&
+                   key->offset == cpu_key.offset &&
+                   key->type == cpu_key.type) {
+                       memmove(ptr + cur, ptr + cur + chunk_len,
+                               array_size - cur - chunk_len);
+                       array_size -= chunk_len;
+                       btrfs_set_super_sys_array_size(super, array_size);
+                       ret = 0;
+                       goto out;
+               }
+
+               cur += chunk_len;
+       }
+out:
+       return ret;
+}
+
+static int free_chunk_item(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          u64 bytenr, u64 len)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       struct btrfs_root *root = fs_info->chunk_root;
+       struct btrfs_chunk *chunk;
+       u64 chunk_type;
+       int ret;
+
+       key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+       key.offset = bytenr;
+       key.type = BTRFS_CHUNK_ITEM_KEY;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+       if (ret < 0)
+               goto out;
+       chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                              struct btrfs_chunk);
+       chunk_type = btrfs_chunk_type(path->nodes[0], chunk);
+
+       ret = btrfs_del_item(trans, root, path);
+       if (ret < 0)
+               goto out;
+
+       if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
+               ret = free_system_chunk_item(fs_info->super_copy, &key);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static u64 get_dev_extent_len(struct map_lookup *map)
+{
+       int div;
+
+       switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+       case 0: /* Single */
+       case BTRFS_BLOCK_GROUP_DUP:
+       case BTRFS_BLOCK_GROUP_RAID1:
+               div = 1;
+               break;
+       case BTRFS_BLOCK_GROUP_RAID5:
+               div = (map->num_stripes - 1);
+               break;
+       case BTRFS_BLOCK_GROUP_RAID6:
+               div = (map->num_stripes - 2);
+               break;
+       case BTRFS_BLOCK_GROUP_RAID10:
+               div = (map->num_stripes / map->sub_stripes);
+               break;
+       default:
+               /* normally, read chunk security hook should handled it */
+               BUG_ON(1);
+       }
+       return map->ce.size / div;
+}
+
+/* free block group/chunk related caches */
+static int free_block_group_cache(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 u64 bytenr, u64 len)
+{
+       struct btrfs_block_group_cache *cache;
+       struct cache_extent *ce;
+       struct map_lookup *map;
+       int ret;
+       int i;
+       u64 flags;
+
+       /* Free block group cache first */
+       cache = btrfs_lookup_block_group(fs_info, bytenr);
+       if (!cache)
+               return -ENOENT;
+       flags = cache->flags;
+       if (cache->free_space_ctl) {
+               btrfs_remove_free_space_cache(cache);
+               kfree(cache->free_space_ctl);
+       }
+       clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len,
+                         (unsigned int)-1, GFP_NOFS);
+       ret = free_space_info(fs_info, flags, len, 0, NULL);
+       if (ret < 0)
+               goto out;
+       kfree(cache);
+
+       /* Then free mapping info and dev usage info */
+       ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr);
+       if (!ce || ce->start != bytenr) {
+               ret = -ENOENT;
+               goto out;
+       }
+       map = container_of(ce, struct map_lookup, ce);
+       for (i = 0; i < map->num_stripes; i++) {
+               struct btrfs_device *device;
+
+               device = map->stripes[i].dev;
+               device->bytes_used -= get_dev_extent_len(map);
+               ret = btrfs_update_device(trans, device);
+               if (ret < 0)
+                       goto out;
+       }
+       remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce);
+       free(map);
+out:
+       return ret;
+}
+
+int btrfs_free_block_group(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
+{
+       struct btrfs_root *extent_root = fs_info->extent_root;
+       struct btrfs_path *path;
+       struct btrfs_block_group_item *bgi;
+       struct btrfs_key key;
+       int ret = 0;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = bytenr;
+       key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+       key.offset = len;
+
+       /* Double check the block group to ensure it's empty */
+       ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
+       if (ret > 0) {
+               ret = -ENONET;
+               goto out;
+       }
+       if (ret < 0)
+               goto out;
+
+       bgi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                            struct btrfs_block_group_item);
+       if (btrfs_disk_block_group_used(path->nodes[0], bgi)) {
+               fprintf(stderr,
+                       "WARNING: block group [%llu,%llu) is not empty\n",
+                       bytenr, bytenr + len);
+               ret = -EINVAL;
+               goto out;
+       }
+       btrfs_release_path(path);
+
+       /*
+        * Now pin all space in the block group, to prevent further transaction
+        * allocate space from it.
+        * Every operation needs a transaction must be in the range.
+        */
+       btrfs_pin_extent(fs_info, bytenr, len);
+
+       /* delete block group item and chunk item */
+       ret = free_block_group_item(trans, fs_info, bytenr, len);
+       if (ret < 0) {
+               fprintf(stderr,
+                       "failed to free block group item for [%llu,%llu)\n",
+                       bytenr, bytenr + len);
+               btrfs_unpin_extent(fs_info, bytenr, len);
+               goto out;
+       }
+
+       ret = free_chunk_dev_extent_items(trans, fs_info, bytenr);
+       if (ret < 0) {
+               fprintf(stderr,
+                       "failed to dev extents belongs to [%llu,%llu)\n",
+                       bytenr, bytenr + len);
+               btrfs_unpin_extent(fs_info, bytenr, len);
+               goto out;
+       }
+       ret = free_chunk_item(trans, fs_info, bytenr, len);
+       if (ret < 0) {
+               fprintf(stderr,
+                       "failed to free chunk for [%llu,%llu)\n",
+                       bytenr, bytenr + len);
+               btrfs_unpin_extent(fs_info, bytenr, len);
+               goto out;
+       }
+
+       /* Now release the block_group_cache */
+       ret = free_block_group_cache(trans, fs_info, bytenr, len);
+       btrfs_unpin_extent(fs_info, bytenr, len);
+
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+/*
  * Fixup block accounting. The initial block accounting created by
  * make_block_groups isn't accuracy in this case.
  */