X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=extent-tree.c;h=e2ae74a7fe66bfbecb64bf27bf7d6994666135fa;hb=f44a0550123be92245943d832df64134c5fd6241;hp=ee87f1f12c738168a5672b045f6b50569ccc0ff5;hpb=62b79931546bca1b95a4b23ecec8378c372b26f7;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/extent-tree.c b/extent-tree.c index ee87f1f..e2ae74a 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -18,6 +18,8 @@ #include #include +#include +#include #include "kerncompat.h" #include "radix-tree.h" #include "ctree.h" @@ -26,12 +28,8 @@ #include "transaction.h" #include "crc32c.h" #include "volumes.h" - -#define BLOCK_GROUP_DATA EXTENT_WRITEBACK -#define BLOCK_GROUP_METADATA EXTENT_UPTODATE -#define BLOCK_GROUP_SYSTEM EXTENT_NEW - -#define BLOCK_GROUP_DIRTY EXTENT_DIRTY +#include "free-space-cache.h" +#include "utils.h" #define PENDING_EXTENT_INSERT 0 #define PENDING_EXTENT_DELETE 1 @@ -60,6 +58,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static struct btrfs_block_group_cache * +btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache + *hint, u64 search_start, int data, int owner); static int remove_sb_from_cache(struct btrfs_root *root, struct btrfs_block_group_cache *cache) @@ -68,18 +69,18 @@ static int remove_sb_from_cache(struct btrfs_root *root, u64 *logical; int stripe_len; int i, nr, ret; + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_io_tree *free_space_cache; - free_space_cache = &root->fs_info->free_space_cache; + free_space_cache = &fs_info->free_space_cache; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - ret = btrfs_rmap_block(&root->fs_info->mapping_tree, - cache->key.objectid, bytenr, 0, + ret = btrfs_rmap_block(fs_info, cache->key.objectid, bytenr, 0, &logical, &nr, &stripe_len); BUG_ON(ret); while (nr--) { clear_extent_dirty(free_space_cache, logical[nr], - logical[nr] + stripe_len - 1, GFP_NOFS); + logical[nr] + stripe_len - 1); } kfree(logical); } @@ -115,7 +116,8 @@ static int cache_block_group(struct btrfs_root *root, last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); key.objectid = last; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.type = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; @@ -142,14 +144,17 @@ static int cache_block_group(struct btrfs_root *root, break; } - if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { + if (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY) { if (key.objectid > last) { hole_size = key.objectid - last; set_extent_dirty(free_space_cache, last, - last + hole_size - 1, - GFP_NOFS); + last + hole_size - 1); } - last = key.objectid + key.offset; + if (key.type == BTRFS_METADATA_ITEM_KEY) + last = key.objectid + root->fs_info->nodesize; + else + last = key.objectid + key.offset; } next: path->slots[0]++; @@ -159,8 +164,7 @@ next: block_group->key.offset > last) { hole_size = block_group->key.objectid + block_group->key.offset - last; - set_extent_dirty(free_space_cache, last, - last + hole_size - 1, GFP_NOFS); + set_extent_dirty(free_space_cache, last, last + hole_size - 1); } remove_sb_from_cache(root, block_group); block_group->cached = 1; @@ -239,24 +243,22 @@ static int noinline find_search_start(struct btrfs_root *root, { int ret; struct btrfs_block_group_cache *cache = *cache_ret; - u64 last; + u64 last = *start_ret; u64 start = 0; u64 end = 0; u64 search_start = *start_ret; int wrapped = 0; - if (!cache) { + if (!cache) goto out; - } again: ret = cache_block_group(root, cache); if (ret) goto out; last = max(search_start, cache->key.objectid); - if (cache->ro || !block_group_bits(cache, data)) { + if (cache->ro || !block_group_bits(cache, data)) goto new_group; - } while(1) { ret = find_first_extent_bit(&root->fs_info->free_space_cache, @@ -277,11 +279,12 @@ again: return 0; } out: + *start_ret = last; cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { printk("Unable to find block group for %llu\n", (unsigned long long)search_start); - WARN_ON(1); + return -ENOENT; } return -ENOSPC; @@ -290,7 +293,6 @@ new_group: wrapped: cache = btrfs_lookup_first_block_group(root->fs_info, last); if (!cache) { -no_cache: if (!wrapped) { wrapped = 1; last = search_start; @@ -298,24 +300,10 @@ no_cache: } goto out; } - cache = btrfs_find_block_group(root, cache, last, data, 0); - cache = btrfs_find_block_group(root, cache, last, data, 0); - if (!cache) - goto no_cache; - *cache_ret = cache; goto again; } -static u64 div_factor(u64 num, int factor) -{ - if (factor == 10) - return num; - num *= factor; - num /= 10; - return num; -} - static int block_group_state_bits(u64 flags) { int bits = 0; @@ -328,10 +316,9 @@ static int block_group_state_bits(u64 flags) return bits; } -struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache - *hint, u64 search_start, - int data, int owner) +static struct btrfs_block_group_cache * +btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache + *hint, u64 search_start, int data, int owner) { struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; @@ -574,7 +561,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, break; } } - btrfs_release_path(root, path); + btrfs_release_path(path); if (owner < BTRFS_FIRST_FREE_OBJECTID) new_size += sizeof(*bi); @@ -585,7 +572,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, return ret; BUG_ON(ret); - ret = btrfs_extend_item(trans, root, path, new_size); + ret = btrfs_extend_item(root, path, new_size); BUG_ON(ret); leaf = path->nodes[0]; @@ -609,7 +596,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, } #endif -static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) +u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) { u32 high_crc = ~(u32)0; u32 low_crc = ~(u32)0; @@ -681,7 +668,7 @@ again: return 0; #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 key.type = BTRFS_EXTENT_REF_V0_KEY; - btrfs_release_path(root, path); + btrfs_release_path(path); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { err = ret; @@ -719,7 +706,7 @@ again: if (match_extent_data_ref(leaf, ref, root_objectid, owner, offset)) { if (recow) { - btrfs_release_path(root, path); + btrfs_release_path(path); goto again; } err = 0; @@ -780,7 +767,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, if (match_extent_data_ref(leaf, ref, root_objectid, owner, offset)) break; - btrfs_release_path(root, path); + btrfs_release_path(path); key.offset++; ret = btrfs_insert_empty_item(trans, root, path, &key, @@ -807,7 +794,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); ret = 0; fail: - btrfs_release_path(root, path); + btrfs_release_path(path); return ret; } @@ -868,8 +855,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, return ret; } -static noinline u32 extent_data_ref_count(struct btrfs_root *root, - struct btrfs_path *path, +static noinline u32 extent_data_ref_count(struct btrfs_path *path, struct btrfs_extent_inline_ref *iref) { struct btrfs_key key; @@ -933,7 +919,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, ret = -ENOENT; #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (ret == -ENOENT && parent) { - btrfs_release_path(root, path); + btrfs_release_path(path); key.type = BTRFS_EXTENT_REF_V0_KEY; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) @@ -963,44 +949,25 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &key, 0); - btrfs_release_path(root, path); + btrfs_release_path(path); return ret; } static inline int extent_ref_type(u64 parent, u64 owner) { + int type; if (owner < BTRFS_FIRST_FREE_OBJECTID) { if (parent > 0) - return BTRFS_SHARED_BLOCK_REF_KEY; + type = BTRFS_SHARED_BLOCK_REF_KEY; else - return BTRFS_TREE_BLOCK_REF_KEY; + type = BTRFS_TREE_BLOCK_REF_KEY; } else { if (parent > 0) - return BTRFS_SHARED_DATA_REF_KEY; - else - return BTRFS_EXTENT_DATA_REF_KEY; - } -} - -static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) - -{ - int level; - for (level = 0; level < BTRFS_MAX_LEVEL; level++) { - if (!path->nodes[level]) - break; - if (path->slots[level] + 1 >= - btrfs_header_nritems(path->nodes[level])) - continue; - if (level == 0) - btrfs_item_key_to_cpu(path->nodes[level], key, - path->slots[level] + 1); + type = BTRFS_SHARED_DATA_REF_KEY; else - btrfs_node_key_to_cpu(path->nodes[level], key, - path->slots[level] + 1); - return 0; + type = BTRFS_EXTENT_DATA_REF_KEY; } - return 1; + return type; } static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, @@ -1024,6 +991,8 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, int want; int ret; int err = 0; + int skinny_metadata = + btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; @@ -1034,11 +1003,44 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, extra_size = btrfs_extent_inline_ref_size(want); else extra_size = -1; + + if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner; + } else if (skinny_metadata) { + skinny_metadata = 0; + } + +again: ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); if (ret < 0) { err = ret; goto out; } + + /* + * We may be a newly converted file system which still has the old fat + * extent entries for metadata, so try and see if we have one of those. + */ + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + if (ret) { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + btrfs_release_path(path); + goto again; + } + } + if (ret) { printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset); return -ENOENT; @@ -1079,10 +1081,10 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, ptr = (unsigned long)(ei + 1); end = (unsigned long)ei + item_size; - if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) { ptr += sizeof(struct btrfs_tree_block_info); BUG_ON(ptr > end); - } else { + } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { if (!(flags & BTRFS_EXTENT_FLAG_DATA)) { return -EIO; } @@ -1158,8 +1160,7 @@ out: return err; } -static int setup_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int setup_inline_extent_backref(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, u64 parent, u64 root_objectid, @@ -1182,7 +1183,7 @@ static int setup_inline_extent_backref(struct btrfs_trans_handle *trans, type = extent_ref_type(parent, owner); size = btrfs_extent_inline_ref_size(type); - ret = btrfs_extend_item(trans, root, path, size); + ret = btrfs_extend_item(root, path, size); BUG_ON(ret); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); @@ -1234,7 +1235,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, if (ret != -ENOENT) return ret; - btrfs_release_path(root, path); + btrfs_release_path(path); *ref_ret = NULL; if (owner < BTRFS_FIRST_FREE_OBJECTID) { @@ -1302,7 +1303,7 @@ static int update_inline_extent_backref(struct btrfs_trans_handle *trans, memmove_extent_buffer(leaf, ptr, ptr + size, end - ptr - size); item_size -= size; - ret = btrfs_truncate_item(trans, root, path, item_size, 1); + ret = btrfs_truncate_item(root, path, item_size, 1); BUG_ON(ret); } btrfs_mark_buffer_dirty(leaf); @@ -1327,7 +1328,7 @@ static int insert_inline_extent_backref(struct btrfs_trans_handle *trans, ret = update_inline_extent_backref(trans, root, path, iref, refs_to_add); } else if (ret == -ENOENT) { - ret = setup_inline_extent_backref(trans, root, path, iref, + ret = setup_inline_extent_backref(root, path, iref, parent, root_objectid, owner, offset, refs_to_add); } @@ -1391,7 +1392,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = 1; - path->leave_spinning = 1; ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, path, bytenr, num_bytes, parent, @@ -1410,10 +1410,9 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_set_extent_refs(leaf, item, refs + 1); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root->fs_info->extent_root, path); + btrfs_release_path(path); path->reada = 1; - path->leave_spinning = 1; /* now insert the actual backref */ ret = insert_extent_backref(trans, root->fs_info->extent_root, @@ -1439,7 +1438,7 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *refs, u64 *flags) + u64 offset, int metadata, u64 *refs, u64 *flags) { struct btrfs_path *path; int ret; @@ -1450,16 +1449,56 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u64 num_refs; u64 extent_flags; - WARN_ON(num_bytes < root->sectorsize); + if (metadata && + !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { + offset = root->fs_info->nodesize; + metadata = 0; + } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; path->reada = 1; + key.objectid = bytenr; - key.offset = num_bytes; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.offset = offset; + if (metadata) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + +again: ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; + + /* + * Deal with the fact that we may have mixed SKINNY and normal refs. If + * we didn't find what we wanted check and see if we have a normal ref + * right next to us, or re-search if we are on the edge of the leaf just + * to make sure. + */ + if (ret > 0 && metadata) { + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == root->fs_info->nodesize) + ret = 0; + } + + if (ret) { + btrfs_release_path(path); + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->fs_info->nodesize; + metadata = 0; + goto again; + } + } + if (ret != 0) { ret = -EIO; goto out; @@ -1492,12 +1531,12 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, *flags = extent_flags; out: btrfs_free_path(path); - return 0; + return ret; } int btrfs_set_block_flags(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 flags) + u64 bytenr, int level, u64 flags) { struct btrfs_path *path; int ret; @@ -1505,17 +1544,48 @@ int btrfs_set_block_flags(struct btrfs_trans_handle *trans, struct extent_buffer *l; struct btrfs_extent_item *item; u32 item_size; + int skinny_metadata = + btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); - WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; path->reada = 1; + key.objectid = bytenr; - key.offset = num_bytes; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + if (skinny_metadata) { + key.offset = level; + key.type = BTRFS_METADATA_ITEM_KEY; + } else { + key.offset = root->fs_info->nodesize; + key.type = BTRFS_EXTENT_ITEM_KEY; + } + +again: ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; + + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.offset == root->fs_info->nodesize && + key.type == BTRFS_EXTENT_ITEM_KEY) + ret = 0; + } + if (ret) { + btrfs_release_path(path); + key.offset = root->fs_info->nodesize; + key.type = BTRFS_EXTENT_ITEM_KEY; + goto again; + } + } + if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); printk("failed to find block number %Lu\n", @@ -1586,7 +1656,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, cond_resched(); if (level == 0) { btrfs_item_key_to_cpu(buf, &key, i); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + if (key.type != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); @@ -1608,7 +1678,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, } } else { bytenr = btrfs_node_blockptr(buf, i); - num_bytes = btrfs_level_size(root, level - 1); + num_bytes = root->fs_info->nodesize; ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, level - 1, 0); if (ret) { @@ -1655,7 +1725,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, bi = btrfs_item_ptr_offset(leaf, path->slots[0]); write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(extent_root, path); + btrfs_release_path(path); fail: finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); @@ -1699,11 +1769,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, BUG_ON(ret); clear_extent_bits(block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); + BLOCK_GROUP_DIRTY); cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; ret = write_one_cache_group(trans, root, path, cache); - BUG_ON(ret); } btrfs_free_path(path); return 0; @@ -1712,11 +1781,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { - struct list_head *head = &info->space_info; - struct list_head *cur; struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); + + flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; + + list_for_each_entry(found, &info->space_info, list) { if (found->flags & flags) return found; } @@ -1724,6 +1793,31 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, } +static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + + /* only support free block group which is empty */ + if (bytes_used) + return -ENOTEMPTY; + + found = __find_space_info(fs_info, flags); + if (!found) + return -ENOENT; + if (found->total_bytes < total_bytes) { + fprintf(stderr, + "WARNING: bad space info to free %llu only have %llu\n", + total_bytes, found->total_bytes); + return -EINVAL; + } + found->total_bytes -= total_bytes; + if (space_info) + *space_info = found; + return 0; +} + static int update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, struct btrfs_space_info **space_info) @@ -1748,7 +1842,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return -ENOMEM; list_add(&found->list, &info->space_info); - found->flags = flags; + found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; found->total_bytes = total_bytes; found->bytes_used = bytes_used; found->bytes_pinned = 0; @@ -1763,6 +1857,8 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_DUP); if (extra_flags) { if (flags & BTRFS_BLOCK_GROUP_DATA) @@ -1775,7 +1871,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } static int do_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, u64 alloc_bytes, + struct btrfs_fs_info *fs_info, u64 alloc_bytes, u64 flags) { struct btrfs_space_info *space_info; @@ -1784,10 +1880,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 num_bytes; int ret; - space_info = __find_space_info(extent_root->fs_info, flags); + space_info = __find_space_info(fs_info, flags); if (!space_info) { - ret = update_space_info(extent_root->fs_info, flags, - 0, 0, &space_info); + ret = update_space_info(fs_info, flags, 0, 0, &space_info); BUG_ON(ret); } BUG_ON(!space_info); @@ -1800,7 +1895,17 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, thresh) return 0; - ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, + /* + * Avoid allocating given chunk type + */ + if (fs_info->avoid_meta_chunk_alloc && + (flags & BTRFS_BLOCK_GROUP_METADATA)) + return 0; + if (fs_info->avoid_sys_chunk_alloc && + (flags & BTRFS_BLOCK_GROUP_SYSTEM)) + return 0; + + ret = btrfs_alloc_chunk(trans, fs_info, &start, &num_bytes, space_info->flags); if (ret == -ENOSPC) { space_info->full = 1; @@ -1809,14 +1914,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, BUG_ON(ret); - ret = btrfs_make_block_group(trans, extent_root, 0, space_info->flags, - BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); + ret = btrfs_make_block_group(trans, fs_info, 0, space_info->flags, + start, num_bytes); BUG_ON(ret); return 0; } -static int update_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int update_block_group(struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free) { @@ -1829,12 +1933,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 end; /* block accounting for super block */ - old_val = btrfs_super_bytes_used(&info->super_copy); + old_val = btrfs_super_bytes_used(info->super_copy); if (alloc) old_val += num_bytes; else old_val -= num_bytes; - btrfs_set_super_bytes_used(&info->super_copy, old_val); + btrfs_set_super_bytes_used(info->super_copy, old_val); /* block accounting for root item */ old_val = btrfs_root_used(&root->root_item); @@ -1854,7 +1958,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, start = cache->key.objectid; end = start + cache->key.offset - 1; set_extent_bits(&info->block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); + BLOCK_GROUP_DIRTY); old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); @@ -1867,8 +1971,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, cache->space_info->bytes_used -= num_bytes; if (mark_free) { set_extent_dirty(&info->free_space_cache, - bytenr, bytenr + num_bytes - 1, - GFP_NOFS); + bytenr, bytenr + num_bytes - 1); } } btrfs_set_block_group_used(&cache->item, old_val); @@ -1887,13 +1990,17 @@ static int update_pinned_extents(struct btrfs_root *root, if (pin) { set_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + num - 1, GFP_NOFS); + bytenr, bytenr + num - 1); } else { clear_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + num - 1, GFP_NOFS); + bytenr, bytenr + num - 1); } while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!cache) { + len = min((u64)fs_info->sectorsize, num); + goto next; + } WARN_ON(!cache); len = min(num, cache->key.offset - (bytenr - cache->key.objectid)); @@ -1906,31 +2013,13 @@ static int update_pinned_extents(struct btrfs_root *root, cache->space_info->bytes_pinned -= len; fs_info->total_pinned -= len; } +next: bytenr += len; num -= len; } return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) -{ - u64 last = 0; - u64 start; - u64 end; - struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; - int ret; - - while(1) { - ret = find_first_extent_bit(pinned_extents, last, - &start, &end, EXTENT_DIRTY); - if (ret) - break; - set_extent_dirty(copy, start, end, GFP_NOFS); - last = end + 1; - } - return 0; -} - int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_io_tree *unpin) @@ -1947,8 +2036,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, if (ret) break; update_pinned_extents(root, start, end + 1 - start, 0); - clear_extent_dirty(unpin, start, end, GFP_NOFS); - set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + clear_extent_dirty(unpin, start, end); + set_extent_dirty(free_space_cache, start, end); } return 0; } @@ -1975,12 +2064,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, u64 end; u64 priv; struct btrfs_fs_info *info = extent_root->fs_info; - struct btrfs_path *path; struct pending_extent_op *extent_op; struct btrfs_key key; int ret; - - path = btrfs_alloc_path(); + int skinny_metadata = + btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA); while(1) { ret = find_first_extent_bit(&info->extent_ins, 0, &start, @@ -1994,23 +2082,27 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, if (extent_op->type == PENDING_EXTENT_INSERT) { key.objectid = start; - key.offset = end + 1 - start; - key.type = BTRFS_EXTENT_ITEM_KEY; + if (skinny_metadata) { + key.offset = extent_op->level; + key.type = BTRFS_METADATA_ITEM_KEY; + } else { + key.offset = extent_op->num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + } ret = alloc_reserved_tree_block(trans, extent_root, extent_root->root_key.objectid, trans->transid, extent_op->flags, &extent_op->key, - extent_op->level, &key); + extent_op->level, &key); + BUG_ON(ret); } else { BUG_ON(1); } - clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, - GFP_NOFS); + clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED); kfree(extent_op); } - btrfs_free_path(path); return 0; } @@ -2024,7 +2116,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, if (is_data) goto pinit; - buf = btrfs_find_tree_block(root, bytenr, num_bytes); + buf = btrfs_find_tree_block(root->fs_info, bytenr, num_bytes); if (!buf) goto pinit; @@ -2058,6 +2150,12 @@ void btrfs_pin_extent(struct btrfs_fs_info *fs_info, update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 1); } +void btrfs_unpin_extent(struct btrfs_fs_info *fs_info, + u64 bytenr, u64 num_bytes) +{ + update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 0); +} + /* * remove an extent from the root, returns 0 on success */ @@ -2070,7 +2168,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_path *path; - struct btrfs_extent_ops *ops = root->fs_info->extent_ops; struct btrfs_root *extent_root = root->fs_info->extent_root; struct extent_buffer *leaf; struct btrfs_extent_item *ei; @@ -2082,6 +2179,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, int num_to_del = 1; u32 item_size; u64 refs; + int skinny_metadata = + btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA); if (root->fs_info->free_extent_hook) { root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes, @@ -2094,9 +2193,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = 1; - path->leave_spinning = 1; is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; + if (is_data) + skinny_metadata = 0; BUG_ON(!is_data && refs_to_drop != 1); ret = lookup_extent_backref(trans, extent_root, path, &iref, @@ -2115,6 +2215,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, found_extent = 1; break; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + key.offset == owner_objectid) { + found_extent = 1; + break; + } if (path->slots[0] - extent_slot > 5) break; extent_slot--; @@ -2130,15 +2235,40 @@ static int __free_extent(struct btrfs_trans_handle *trans, NULL, refs_to_drop, is_data); BUG_ON(ret); - btrfs_release_path(extent_root, path); - path->leave_spinning = 1; + btrfs_release_path(path); key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = num_bytes; + + if (skinny_metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner_objectid; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + } ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + if (ret > 0 && skinny_metadata && path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], + &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + btrfs_release_path(path); + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + ret = btrfs_search_slot(trans, extent_root, + &key, path, -1, 1); + } + if (ret) { printk(KERN_ERR "umm, got %d back from search" ", was looking for %llu\n", ret, @@ -2169,8 +2299,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, owner_objectid, 0); BUG_ON(ret < 0); - btrfs_release_path(extent_root, path); - path->leave_spinning = 1; + btrfs_release_path(path); key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; @@ -2193,7 +2322,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, BUG_ON(item_size < sizeof(*ei)); ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); - if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && + key.type == BTRFS_EXTENT_ITEM_KEY) { struct btrfs_tree_block_info *bi; BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); bi = (struct btrfs_tree_block_info *)(ei + 1); @@ -2227,7 +2357,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, if (found_extent) { BUG_ON(is_data && refs_to_drop != - extent_data_ref_count(root, path, iref)); + extent_data_ref_count(path, iref)); if (iref) { BUG_ON(path->slots[0] != extent_slot); } else { @@ -2237,14 +2367,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, } } - if (ops && ops->free_extent) { - ret = ops->free_extent(root, bytenr, num_bytes); - if (ret > 0) { - pin = 0; - mark_free = 0; - } - } - if (pin) { ret = pin_down_bytes(trans, root, bytenr, num_bytes, is_data); @@ -2256,16 +2378,14 @@ static int __free_extent(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); BUG_ON(ret); - btrfs_release_path(extent_root, path); + btrfs_release_path(path); if (is_data) { ret = btrfs_del_csums(trans, root, bytenr, num_bytes); BUG_ON(ret); } - ret = update_block_group(trans, root, bytenr, num_bytes, 0, - mark_free); - BUG_ON(ret); + update_block_group(root, bytenr, num_bytes, 0, mark_free); } fail: btrfs_free_path(path); @@ -2302,8 +2422,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct BUG_ON(ret); extent_op = (struct pending_extent_op *)(unsigned long)priv; - clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, - GFP_NOFS); + clear_extent_bits(pending_del, start, end, EXTENT_LOCKED); if (!test_range_bit(extent_ins, start, end, EXTENT_LOCKED, 0)) { @@ -2320,7 +2439,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct (unsigned long)priv; clear_extent_bits(extent_ins, start, end, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_LOCKED); if (extent_op->type == PENDING_BACKREF_UPDATE) BUG_ON(1); @@ -2333,6 +2452,17 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct return err; } + +int btrfs_free_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + u64 parent, int last_ref) +{ + return btrfs_free_extent(trans, root, buf->start, buf->len, parent, + root->root_key.objectid, + btrfs_header_level(buf), 0); +} + /* * remove an extent from the root, returns 0 on success */ @@ -2346,7 +2476,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, int pending_ret; int ret; - WARN_ON(num_bytes < root->sectorsize); + WARN_ON(num_bytes < root->fs_info->sectorsize); if (root == extent_root) { struct pending_extent_op *extent_op; @@ -2360,7 +2490,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->pending_del, bytenr, bytenr + num_bytes - 1, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_LOCKED); set_state_private(&root->fs_info->pending_del, bytenr, (unsigned long)extent_op); return 0; @@ -2373,9 +2503,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, static u64 stripe_align(struct btrfs_root *root, u64 val) { - u64 mask = ((u64)root->stripesize - 1); - u64 ret = (val + mask) & ~mask; - return ret; + return round_up(val, (u64)root->fs_info->stripesize); } /* @@ -2403,8 +2531,10 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int full_scan = 0; int wrapped = 0; - WARN_ON(num_bytes < root->sectorsize); - btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + WARN_ON(num_bytes < info->sectorsize); + ins->type = BTRFS_EXTENT_ITEM_KEY; + + search_start = stripe_align(root, search_start); if (hint_byte) { block_group = btrfs_lookup_first_block_group(info, hint_byte); @@ -2421,6 +2551,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, total_needed += empty_size; check_failed: + search_start = stripe_align(root, search_start); if (!block_group) { block_group = btrfs_lookup_first_block_group(info, search_start); @@ -2431,9 +2562,8 @@ check_failed: ret = find_search_start(root, &block_group, &search_start, total_needed, data); if (ret) - goto error; + goto new_group; - search_start = stripe_align(root, search_start); ins->objectid = search_start; ins->offset = num_bytes; @@ -2456,6 +2586,13 @@ check_failed: goto new_group; } + if (info->excluded_extents && + test_range_bit(info->excluded_extents, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; @@ -2463,6 +2600,21 @@ check_failed: } if (!(data & BTRFS_BLOCK_GROUP_DATA)) { + if (check_crossing_stripes(info, ins->objectid, num_bytes)) { + struct btrfs_block_group_cache *bg_cache; + u64 bg_offset; + + bg_cache = btrfs_lookup_block_group(info, ins->objectid); + if (!bg_cache) + goto no_bg_cache; + bg_offset = ins->objectid - bg_cache->key.objectid; + + search_start = round_up( + bg_offset + num_bytes, BTRFS_STRIPE_LEN) + + bg_cache->key.objectid; + goto new_group; + } +no_bg_cache: block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) trans->block_group = block_group; @@ -2494,61 +2646,53 @@ error: return ret; } -static int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 empty_size, - u64 hint_byte, u64 search_end, - struct btrfs_key *ins, int data) +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 empty_size, + u64 hint_byte, u64 search_end, + struct btrfs_key *ins, bool is_data) { int ret; u64 search_start = 0; u64 alloc_profile; + u64 profile; struct btrfs_fs_info *info = root->fs_info; - if (info->extent_ops) { - struct btrfs_extent_ops *ops = info->extent_ops; - ret = ops->alloc_extent(root, num_bytes, hint_byte, ins); - BUG_ON(ret); - goto found; - } - - if (data) { + if (is_data) { alloc_profile = info->avail_data_alloc_bits & info->data_alloc_profile; - data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; - } else if ((info->system_allocs > 0 || root == info->chunk_root) && - info->system_allocs >= 0) { + profile = BTRFS_BLOCK_GROUP_DATA | alloc_profile; + } else if (info->system_allocs == 1 || root == info->chunk_root) { alloc_profile = info->avail_system_alloc_bits & info->system_alloc_profile; - data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; + profile = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; } else { alloc_profile = info->avail_metadata_alloc_bits & info->metadata_alloc_profile; - data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; + profile = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } if (root->ref_cows) { - if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { - ret = do_chunk_alloc(trans, root->fs_info->extent_root, + if (!(profile & BTRFS_BLOCK_GROUP_METADATA)) { + ret = do_chunk_alloc(trans, info, num_bytes, BTRFS_BLOCK_GROUP_METADATA); BUG_ON(ret); } - ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data); + ret = do_chunk_alloc(trans, info, + num_bytes + SZ_2M, profile); BUG_ON(ret); } - WARN_ON(num_bytes < root->sectorsize); + WARN_ON(num_bytes < info->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, - trans->alloc_exclude_nr, data); - BUG_ON(ret); -found: - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); + trans->alloc_exclude_nr, profile); + if (ret < 0) + return ret; + clear_extent_dirty(&info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1); return ret; } @@ -2565,12 +2709,16 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_extent_inline_ref *iref; struct btrfs_path *path; struct extent_buffer *leaf; - u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); + u32 size = sizeof(*extent_item) + sizeof(*iref); + int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); + + if (!skinny_metadata) + size += sizeof(*block_info); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; - path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, ins, size); BUG_ON(ret); @@ -2582,26 +2730,24 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_set_extent_generation(leaf, extent_item, generation); btrfs_set_extent_flags(leaf, extent_item, flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); - block_info = (struct btrfs_tree_block_info *)(extent_item + 1); - btrfs_set_tree_block_key(leaf, block_info, key); - btrfs_set_tree_block_level(leaf, block_info, level); + if (skinny_metadata) { + iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); + } else { + block_info = (struct btrfs_tree_block_info *)(extent_item + 1); + btrfs_set_tree_block_key(leaf, block_info, key); + btrfs_set_tree_block_level(leaf, block_info, level); + iref = (struct btrfs_extent_inline_ref *)(block_info + 1); + } - iref = (struct btrfs_extent_inline_ref *)(block_info + 1); btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY); btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); - ret = update_block_group(trans, root, ins->objectid, ins->offset, + ret = update_block_group(root, ins->objectid, fs_info->nodesize, 1, 0); - if (ret) { - printk(KERN_ERR "btrfs update block group failed for %llu " - "%llu\n", (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); - BUG(); - } return ret; } @@ -2632,10 +2778,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_LOCKED); set_state_private(&root->fs_info->extent_ins, ins->objectid, (unsigned long)extent_op); } else { + if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { + ins->offset = level; + ins->type = BTRFS_METADATA_ITEM_KEY; + } ret = alloc_reserved_tree_block(trans, root, root_objectid, generation, flags, key, level, ins); @@ -2667,7 +2817,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(ret); } - buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); + buf = btrfs_find_create_tree_block(root->fs_info, ins.objectid); if (!buf) { btrfs_free_extent(trans, root, ins.objectid, ins.offset, 0, root->root_key.objectid, level, 0); @@ -2746,8 +2896,8 @@ static void noinline reada_walk_down(struct btrfs_root *root, for (i = slot; i < nritems && skipped < 32; i++) { bytenr = btrfs_node_blockptr(node, i); - if (last && ((bytenr > last && bytenr - last > 32 * 1024) || - (last > bytenr && last - bytenr > 32 * 1024))) { + if (last && ((bytenr > last && bytenr - last > SZ_32K) || + (last > bytenr && last - bytenr > SZ_32K))) { skipped++; continue; } @@ -2844,6 +2994,13 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = read_tree_block(root, bytenr, blocksize, ptr_gen); mutex_lock(&root->fs_info->fs_mutex); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + ret = PTR_ERR(next); + else + ret = -EIO; + break; + } } WARN_ON(*level <= 0); if (path->nodes[*level-1]) @@ -2928,109 +3085,53 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, return 1; } -/* - * drop the reference count on the tree rooted at 'snap'. This traverses - * the tree freeing any blocks that have a ref count of zero after being - * decremented. - */ -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root) -{ - int ret = 0; - int wret; - int level; - struct btrfs_path *path; - int i; - int orig_level; - struct btrfs_root_item *root_item = &root->root_item; - - path = btrfs_alloc_path(); - BUG_ON(!path); - - level = btrfs_header_level(root->node); - orig_level = level; - if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { - path->nodes[level] = root->node; - extent_buffer_get(root->node); - path->slots[level] = 0; - } else { - struct btrfs_key key; - struct btrfs_disk_key found_key; - struct extent_buffer *node; - - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); - level = root_item->drop_level; - path->lowest_level = level; - wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (wret < 0) { - ret = wret; - goto out; - } - node = path->nodes[level]; - btrfs_node_key(node, &found_key, path->slots[level]); - WARN_ON(memcmp(&found_key, &root_item->drop_progress, - sizeof(found_key))); - } - while(1) { - wret = walk_down_tree(trans, root, path, &level); - if (wret < 0) - ret = wret; - if (wret != 0) - break; - - wret = walk_up_tree(trans, root, path, &level); - if (wret < 0) - ret = wret; - if (wret != 0) - break; - /* - ret = -EAGAIN; - break; - */ - } - for (i = 0; i <= orig_level; i++) { - if (path->nodes[i]) { - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } - } -out: - btrfs_free_path(path); - return ret; -} - #endif int btrfs_free_block_groups(struct btrfs_fs_info *info) { + struct btrfs_space_info *sinfo; + struct btrfs_block_group_cache *cache; u64 start; u64 end; u64 ptr; int ret; + while(1) { ret = find_first_extent_bit(&info->block_group_cache, 0, &start, &end, (unsigned int)-1); if (ret) break; ret = get_state_private(&info->block_group_cache, start, &ptr); - if (!ret) - kfree((void *)(unsigned long)ptr); + if (!ret) { + cache = u64_to_ptr(ptr); + if (cache->free_space_ctl) { + btrfs_remove_free_space_cache(cache); + kfree(cache->free_space_ctl); + } + kfree(cache); + } clear_extent_bits(&info->block_group_cache, start, - end, (unsigned int)-1, GFP_NOFS); + end, (unsigned int)-1); } while(1) { ret = find_first_extent_bit(&info->free_space_cache, 0, &start, &end, EXTENT_DIRTY); if (ret) break; - clear_extent_dirty(&info->free_space_cache, start, - end, GFP_NOFS); + clear_extent_dirty(&info->free_space_cache, start, end); + } + + while (!list_empty(&info->space_info)) { + sinfo = list_entry(info->space_info.next, + struct btrfs_space_info, list); + list_del_init(&sinfo->list); + kfree(sinfo); } return 0; } -int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key) +static int find_first_block_group(struct btrfs_root *root, + struct btrfs_path *path, struct btrfs_key *key) { int ret; struct btrfs_key found_key; @@ -3063,6 +3164,54 @@ error: return ret; } +static void account_super_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { + stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; + cache->bytes_super += stripe_len; + } + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(fs_info, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + if (ret) + return; + + while (nr--) { + u64 start, len; + + if (logical[nr] > cache->key.objectid + + cache->key.offset) + continue; + + if (logical[nr] + stripe_len <= cache->key.objectid) + continue; + + start = logical[nr]; + if (start < cache->key.objectid) { + start = cache->key.objectid; + len = (logical[nr] + stripe_len) - start; + } else { + len = min_t(u64, stripe_len, + cache->key.objectid + + cache->key.offset - start); + } + + cache->bytes_super += len; + } + + kfree(logical); + } +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -3081,7 +3230,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) root = info->extent_root; key.objectid = 0; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -3097,10 +3246,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) } leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + cache = kzalloc(sizeof(*cache), GFP_NOFS); if (!cache) { ret = -ENOMEM; - break; + goto error; } read_extent_buffer(leaf, &cache->item, @@ -3110,7 +3260,20 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->cached = 0; cache->pinned = 0; key.objectid = found_key.objectid + found_key.offset; - btrfs_release_path(root, path); + if (found_key.offset == 0) + key.objectid++; + btrfs_release_path(path); + + /* + * Skip 0 sized block group, don't insert them into block + * group cache tree, as its length is 0, it won't get + * freed at close_ctree() time. + */ + if (found_key.offset == 0) { + free(cache); + continue; + } + cache->flags = btrfs_block_group_flags(&cache->item); bit = 0; if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { @@ -3121,9 +3284,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) bit = BLOCK_GROUP_METADATA; } set_avail_alloc_bits(info, cache->flags); - if (btrfs_chunk_readonly(root, cache->key.objectid)) + if (btrfs_chunk_readonly(info, cache->key.objectid)) cache->ro = 1; + account_super_bytes(info, cache); + ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), &space_info); @@ -3133,7 +3298,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) /* use EXTENT_LOCKED to prevent merging */ set_extent_bits(block_group_cache, found_key.objectid, found_key.objectid + found_key.offset - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + bit | EXTENT_LOCKED); set_state_private(block_group_cache, found_key.objectid, (unsigned long)cache); } @@ -3143,50 +3308,67 @@ error: return ret; } -int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_objectid, u64 chunk_offset, - u64 size) +struct btrfs_block_group_cache * +btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, + u64 chunk_offset, u64 size) { int ret; int bit = 0; - struct btrfs_root *extent_root; struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; - extent_root = root->fs_info->extent_root; - block_group_cache = &root->fs_info->block_group_cache; + block_group_cache = &fs_info->block_group_cache; cache = kzalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); cache->key.objectid = chunk_offset; cache->key.offset = size; - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; btrfs_set_block_group_used(&cache->item, bytes_used); - btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); + btrfs_set_block_group_chunk_objectid(&cache->item, + BTRFS_FIRST_CHUNK_TREE_OBJECTID); cache->flags = type; btrfs_set_block_group_flags(&cache->item, type); - ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, + account_super_bytes(fs_info, cache); + ret = update_space_info(fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); bit = block_group_state_bits(type); - set_extent_bits(block_group_cache, chunk_offset, - chunk_offset + size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + ret = set_extent_bits(block_group_cache, chunk_offset, + chunk_offset + size - 1, + bit | EXTENT_LOCKED); + BUG_ON(ret); + + ret = set_state_private(block_group_cache, chunk_offset, + (unsigned long)cache); + BUG_ON(ret); + set_avail_alloc_bits(fs_info, type); + + return cache; +} + +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytes_used, + u64 type, u64 chunk_offset, u64 size) +{ + int ret; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_block_group_cache *cache; - set_state_private(block_group_cache, chunk_offset, - (unsigned long)cache); + cache = btrfs_add_block_group(fs_info, bytes_used, type, chunk_offset, + size); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); BUG_ON(ret); - finish_current_insert(trans, extent_root); + ret = finish_current_insert(trans, extent_root); + BUG_ON(ret); ret = del_pending_extents(trans, extent_root); BUG_ON(ret); - set_avail_alloc_bits(extent_root->fs_info, type); + return 0; } @@ -3198,7 +3380,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, * before doing any block allocation. */ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_fs_info *fs_info) { u64 total_bytes; u64 cur_start; @@ -3210,15 +3392,14 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, u64 chunk_objectid; int ret; int bit; - struct btrfs_root *extent_root; + struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; - extent_root = root->fs_info->extent_root; - block_group_cache = &root->fs_info->block_group_cache; + block_group_cache = &fs_info->block_group_cache; chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - group_align = 64 * root->sectorsize; + total_bytes = btrfs_super_total_bytes(fs_info->super_copy); + group_align = 64 * fs_info->sectorsize; cur_start = 0; while (cur_start < total_bytes) { @@ -3229,19 +3410,18 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, group_type = BTRFS_BLOCK_GROUP_SYSTEM; group_size /= 4; group_size &= ~(group_align - 1); - group_size = max_t(u64, group_size, 8 * 1024 * 1024); - group_size = min_t(u64, group_size, 32 * 1024 * 1024); + group_size = max_t(u64, group_size, SZ_8M); + group_size = min_t(u64, group_size, SZ_32M); } else { group_size &= ~(group_align - 1); if (total_data >= total_metadata * 2) { group_type = BTRFS_BLOCK_GROUP_METADATA; - group_size = min_t(u64, group_size, - 1ULL * 1024 * 1024 * 1024); + group_size = min_t(u64, group_size, SZ_1G); total_metadata += group_size; } else { group_type = BTRFS_BLOCK_GROUP_DATA; group_size = min_t(u64, group_size, - 5ULL * 1024 * 1024 * 1024); + 5ULL * SZ_1G); total_data += group_size; } if ((total_bytes - cur_start) * 4 < group_size * 5) @@ -3253,7 +3433,7 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, cache->key.objectid = cur_start; cache->key.offset = group_size; - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; btrfs_set_block_group_used(&cache->item, 0); btrfs_set_block_group_chunk_objectid(&cache->item, @@ -3262,14 +3442,14 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, cache->flags = group_type; - ret = update_space_info(root->fs_info, group_type, group_size, + ret = update_space_info(fs_info, group_type, group_size, 0, &cache->space_info); BUG_ON(ret); - set_avail_alloc_bits(extent_root->fs_info, group_type); + set_avail_alloc_bits(fs_info, group_type); set_extent_bits(block_group_cache, cur_start, cur_start + group_size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + bit | EXTENT_LOCKED); set_state_private(block_group_cache, cur_start, (unsigned long)cache); cur_start += group_size; @@ -3277,7 +3457,7 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, /* then insert all the items */ cur_start = 0; while(cur_start < total_bytes) { - cache = btrfs_lookup_block_group(root->fs_info, cur_start); + cache = btrfs_lookup_block_group(fs_info, cur_start); BUG_ON(!cache); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, @@ -3293,93 +3473,363 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, return 0; } -int btrfs_update_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_update_block_group(struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free) { - return update_block_group(trans, root, bytenr, num_bytes, + return update_block_group(root, bytenr, num_bytes, alloc, mark_free); } -static int btrfs_count_extents_in_block_group(struct btrfs_root *root, - struct btrfs_path *path, u64 start, - u64 len, - u64 *total) +/* + * Just remove a block group item in extent tree + * Caller should ensure the block group is empty and all space is pinned. + * Or new tree block/data may be allocated into it. + */ +static int free_block_group_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) { + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_root *root = fs_info->extent_root; + int ret = 0; + + key.objectid = bytenr; + key.offset = len; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + if (ret < 0) + goto out; + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} + +static int free_dev_extent_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 devid, u64 dev_offset) +{ + struct btrfs_root *root = fs_info->dev_root; + struct btrfs_path *path; struct btrfs_key key; - struct extent_buffer *leaf; - u64 bytes_used = 0; int ret; - int slot; - key.offset = 0; - key.objectid = start; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(NULL, root->fs_info->extent_root, - &key, path, 0, 0); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = devid; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = dev_offset; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) - return ret; - while(1) { - leaf = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - return ret; - if (ret > 0) - break; - leaf = path->nodes[0]; - slot = path->slots[0]; + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} + +static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 chunk_offset) +{ + struct btrfs_chunk *chunk = NULL; + struct btrfs_root *root= fs_info->chunk_root; + struct btrfs_path *path; + struct btrfs_key key; + u16 num_stripes; + int i; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = chunk_offset; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk); + for (i = 0; i < num_stripes; i++) { + ret = free_dev_extent_item(trans, fs_info, + btrfs_stripe_devid_nr(path->nodes[0], chunk, i), + btrfs_stripe_offset_nr(path->nodes[0], chunk, i)); + if (ret < 0) + goto out; + } +out: + btrfs_free_path(path); + return ret; +} + +static int free_system_chunk_item(struct btrfs_super_block *super, + struct btrfs_key *key) +{ + struct btrfs_disk_key *disk_key; + struct btrfs_key cpu_key; + u32 array_size = btrfs_super_sys_array_size(super); + char *ptr = (char *)super->sys_chunk_array; + int cur = 0; + int ret = -ENOENT; + + while (cur < btrfs_super_sys_array_size(super)) { + struct btrfs_chunk *chunk; + u32 num_stripes; + u32 chunk_len; + + disk_key = (struct btrfs_disk_key *)(ptr + cur); + btrfs_disk_key_to_cpu(&cpu_key, disk_key); + if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) { + /* just in case */ + ret = -EIO; + goto out; } - btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid > start + len) - break; - if (key.type == BTRFS_EXTENT_ITEM_KEY) - bytes_used += key.offset; - path->slots[0]++; + + chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key)); + num_stripes = btrfs_stack_chunk_num_stripes(chunk); + chunk_len = btrfs_chunk_item_size(num_stripes) + + sizeof(*disk_key); + + if (key->objectid == cpu_key.objectid && + key->offset == cpu_key.offset && + key->type == cpu_key.type) { + memmove(ptr + cur, ptr + cur + chunk_len, + array_size - cur - chunk_len); + array_size -= chunk_len; + btrfs_set_super_sys_array_size(super, array_size); + ret = 0; + goto out; + } + + cur += chunk_len; } - *total = bytes_used; - btrfs_release_path(root, path); - return 0; +out: + return ret; } -int btrfs_check_block_accounting(struct btrfs_root *root) +static int free_chunk_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr) { + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_root *root = fs_info->chunk_root; + struct btrfs_chunk *chunk; + u64 chunk_type; int ret; - u64 start = 0; - u64 bytes_used = 0; - struct btrfs_path path; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.offset = bytenr; + key.type = BTRFS_CHUNK_ITEM_KEY; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + if (ret < 0) + goto out; + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + chunk_type = btrfs_chunk_type(path->nodes[0], chunk); + + ret = btrfs_del_item(trans, root, path); + if (ret < 0) + goto out; + + if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) + ret = free_system_chunk_item(fs_info->super_copy, &key); +out: + btrfs_free_path(path); + return ret; +} + +static u64 get_dev_extent_len(struct map_lookup *map) +{ + int div; + + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* Single */ + case BTRFS_BLOCK_GROUP_DUP: + case BTRFS_BLOCK_GROUP_RAID1: + div = 1; + break; + case BTRFS_BLOCK_GROUP_RAID5: + div = (map->num_stripes - 1); + break; + case BTRFS_BLOCK_GROUP_RAID6: + div = (map->num_stripes - 2); + break; + case BTRFS_BLOCK_GROUP_RAID10: + div = (map->num_stripes / map->sub_stripes); + break; + default: + /* normally, read chunk security hook should handled it */ + BUG_ON(1); + } + return map->ce.size / div; +} + +/* free block group/chunk related caches */ +static int free_block_group_cache(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) +{ struct btrfs_block_group_cache *cache; - struct btrfs_fs_info *fs_info = root->fs_info; + struct cache_extent *ce; + struct map_lookup *map; + int ret; + int i; + u64 flags; - btrfs_init_path(&path); + /* Free block group cache first */ + cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!cache) + return -ENOENT; + flags = cache->flags; + if (cache->free_space_ctl) { + btrfs_remove_free_space_cache(cache); + kfree(cache->free_space_ctl); + } + clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len - 1, + (unsigned int)-1); + ret = free_space_info(fs_info, flags, len, 0, NULL); + if (ret < 0) + goto out; + kfree(cache); - while(1) { - cache = btrfs_lookup_block_group(fs_info, start); - if (!cache) - break; + /* Then free mapping info and dev usage info */ + ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr); + if (!ce || ce->start != bytenr) { + ret = -ENOENT; + goto out; + } + map = container_of(ce, struct map_lookup, ce); + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_device *device; - ret = btrfs_count_extents_in_block_group(root, &path, - cache->key.objectid, - cache->key.offset, - &bytes_used); + device = map->stripes[i].dev; + device->bytes_used -= get_dev_extent_len(map); + ret = btrfs_update_device(trans, device); + if (ret < 0) + goto out; + } + remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce); + free(map); +out: + return ret; +} - if (ret == 0) { - u64 on_disk = btrfs_block_group_used(&cache->item); - if (on_disk != bytes_used) { - fprintf(stderr, "bad block group accounting found %llu " - "expected %llu block group %llu\n", - (unsigned long long)bytes_used, - (unsigned long long)on_disk, - (unsigned long long)cache->key.objectid); - } - } - start = cache->key.objectid + cache->key.offset; +int btrfs_free_block_group(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 len) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_path *path; + struct btrfs_block_group_item *bgi; + struct btrfs_key key; + int ret = 0; - cache->space_info->bytes_used = 0; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = bytenr; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + key.offset = len; + + /* Double check the block group to ensure it's empty */ + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); + if (ret > 0) { + ret = -ENONET; + goto out; } - return 0; + if (ret < 0) + goto out; + + bgi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_block_group_item); + if (btrfs_disk_block_group_used(path->nodes[0], bgi)) { + fprintf(stderr, + "WARNING: block group [%llu,%llu) is not empty\n", + bytenr, bytenr + len); + ret = -EINVAL; + goto out; + } + btrfs_release_path(path); + + /* + * Now pin all space in the block group, to prevent further transaction + * allocate space from it. + * Every operation needs a transaction must be in the range. + */ + btrfs_pin_extent(fs_info, bytenr, len); + + /* delete block group item and chunk item */ + ret = free_block_group_item(trans, fs_info, bytenr, len); + if (ret < 0) { + fprintf(stderr, + "failed to free block group item for [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + + ret = free_chunk_dev_extent_items(trans, fs_info, bytenr); + if (ret < 0) { + fprintf(stderr, + "failed to dev extents belongs to [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + ret = free_chunk_item(trans, fs_info, bytenr); + if (ret < 0) { + fprintf(stderr, + "failed to free chunk for [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + + /* Now release the block_group_cache */ + ret = free_block_group_cache(trans, fs_info, bytenr, len); + btrfs_unpin_extent(fs_info, bytenr, len); + +out: + btrfs_free_path(path); + return ret; } /* @@ -3389,7 +3839,7 @@ int btrfs_check_block_accounting(struct btrfs_root *root) int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - int ret; + int ret = 0; int slot; u64 start = 0; u64 bytes_used = 0; @@ -3411,7 +3861,7 @@ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, } while(1) { - cache = btrfs_lookup_block_group(fs_info, start); + cache = btrfs_lookup_first_block_group(fs_info, start); if (!cache) break; start = cache->key.objectid + cache->key.offset; @@ -3420,13 +3870,13 @@ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->block_group_cache, cache->key.objectid, cache->key.objectid + cache->key.offset -1, - BLOCK_GROUP_DIRTY, GFP_NOFS); + BLOCK_GROUP_DIRTY); } btrfs_init_path(&path); key.offset = 0; key.objectid = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, 0, 0); if (ret < 0) @@ -3446,13 +3896,361 @@ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(leaf, &key, slot); if (key.type == BTRFS_EXTENT_ITEM_KEY) { bytes_used += key.offset; - ret = btrfs_update_block_group(trans, root, + ret = btrfs_update_block_group(root, key.objectid, key.offset, 1, 0); BUG_ON(ret); + } else if (key.type == BTRFS_METADATA_ITEM_KEY) { + bytes_used += fs_info->nodesize; + ret = btrfs_update_block_group(root, + key.objectid, fs_info->nodesize, 1, 0); + if (ret) + goto out; } path.slots[0]++; } - btrfs_set_super_bytes_used(&root->fs_info->super_copy, bytes_used); - btrfs_release_path(root, &path); + btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used); + ret = 0; +out: + btrfs_release_path(&path); + return ret; +} + +static void __get_extent_size(struct btrfs_root *root, struct btrfs_path *path, + u64 *start, u64 *len) +{ + struct btrfs_key key; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + BUG_ON(!(key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY)); + *start = key.objectid; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + *len = key.offset; + else + *len = root->fs_info->nodesize; +} + +/* + * Find first overlap extent for range [bytenr, bytenr + len) + * Return 0 for found and point path to it. + * Return >0 for not found. + * Return <0 for err + */ +int btrfs_search_overlap_extent(struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, u64 len) +{ + struct btrfs_key key; + u64 cur_start; + u64 cur_len; + int ret; + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + BUG_ON(ret == 0); + + ret = btrfs_previous_extent_item(root, path, 0); + if (ret < 0) + return ret; + /* no previous, check next extent */ + if (ret > 0) + goto next; + __get_extent_size(root, path, &cur_start, &cur_len); + /* Tail overlap */ + if (cur_start + cur_len > bytenr) + return 1; + +next: + ret = btrfs_next_extent_item(root, path, bytenr + len); + if (ret < 0) + return ret; + /* No next, prev already checked, no overlap */ + if (ret > 0) + return 0; + __get_extent_size(root, path, &cur_start, &cur_len); + /* head overlap*/ + if (cur_start < bytenr + len) + return 1; + return 0; +} + +static int __btrfs_record_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode, + u64 file_pos, u64 disk_bytenr, + u64 *ret_num_bytes) +{ + int ret; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key ins_key; + struct btrfs_path *path; + struct btrfs_extent_item *ei; + u64 nbytes; + u64 extent_num_bytes; + u64 extent_bytenr; + u64 extent_offset; + u64 num_bytes = *ret_num_bytes; + + /* + * All supported file system should not use its 0 extent. + * As it's for hole + * + * And hole extent has no size limit, no need to loop. + */ + if (disk_bytenr == 0) { + ret = btrfs_insert_file_extent(trans, root, objectid, + file_pos, disk_bytenr, + num_bytes, num_bytes); + return ret; + } + num_bytes = min_t(u64, num_bytes, BTRFS_MAX_EXTENT_SIZE); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* First to check extent overlap */ + ret = btrfs_search_overlap_extent(extent_root, path, disk_bytenr, + num_bytes); + if (ret < 0) + goto fail; + if (ret > 0) { + /* Found overlap */ + u64 cur_start; + u64 cur_len; + + __get_extent_size(extent_root, path, &cur_start, &cur_len); + /* + * For convert case, this extent should be a subset of + * existing one. + */ + BUG_ON(disk_bytenr < cur_start); + + extent_bytenr = cur_start; + extent_num_bytes = cur_len; + extent_offset = disk_bytenr - extent_bytenr; + } else { + /* No overlap, create new extent */ + btrfs_release_path(path); + ins_key.objectid = disk_bytenr; + ins_key.offset = num_bytes; + ins_key.type = BTRFS_EXTENT_ITEM_KEY; + + ret = btrfs_insert_empty_item(trans, extent_root, path, + &ins_key, sizeof(*ei)); + if (ret == 0) { + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + + btrfs_set_extent_refs(leaf, ei, 0); + btrfs_set_extent_generation(leaf, ei, 0); + btrfs_set_extent_flags(leaf, ei, + BTRFS_EXTENT_FLAG_DATA); + btrfs_mark_buffer_dirty(leaf); + + ret = btrfs_update_block_group(root, disk_bytenr, + num_bytes, 1, 0); + if (ret) + goto fail; + } else if (ret != -EEXIST) { + goto fail; + } + btrfs_extent_post_op(trans, extent_root); + extent_bytenr = disk_bytenr; + extent_num_bytes = num_bytes; + extent_offset = 0; + } + btrfs_release_path(path); + ins_key.objectid = objectid; + ins_key.offset = file_pos; + ins_key.type = BTRFS_EXTENT_DATA_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &ins_key, + sizeof(*fi)); + if (ret) + goto fail; + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_disk_bytenr(leaf, fi, extent_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, extent_num_bytes); + btrfs_set_file_extent_offset(leaf, fi, extent_offset); + btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, extent_num_bytes); + btrfs_set_file_extent_compression(leaf, fi, 0); + btrfs_set_file_extent_encryption(leaf, fi, 0); + btrfs_set_file_extent_other_encoding(leaf, fi, 0); + btrfs_mark_buffer_dirty(leaf); + + nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes; + btrfs_set_stack_inode_nbytes(inode, nbytes); + btrfs_release_path(path); + + ret = btrfs_inc_extent_ref(trans, root, extent_bytenr, extent_num_bytes, + 0, root->root_key.objectid, objectid, + file_pos - extent_offset); + if (ret) + goto fail; + ret = 0; + *ret_num_bytes = min(extent_num_bytes - extent_offset, num_bytes); +fail: + btrfs_free_path(path); + return ret; +} + +/* + * Record a file extent. Do all the required works, such as inserting + * file extent item, inserting extent item and backref item into extent + * tree and updating block accounting. + */ +int btrfs_record_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode, + u64 file_pos, u64 disk_bytenr, + u64 num_bytes) +{ + u64 cur_disk_bytenr = disk_bytenr; + u64 cur_file_pos = file_pos; + u64 cur_num_bytes = num_bytes; + int ret = 0; + + while (num_bytes > 0) { + ret = __btrfs_record_file_extent(trans, root, objectid, + inode, cur_file_pos, + cur_disk_bytenr, + &cur_num_bytes); + if (ret < 0) + break; + cur_disk_bytenr += cur_num_bytes; + cur_file_pos += cur_num_bytes; + num_bytes -= cur_num_bytes; + } + return ret; +} + + +static int add_excluded_extent(struct btrfs_root *root, + u64 start, u64 num_bytes) +{ + u64 end = start + num_bytes - 1; + set_extent_bits(&root->fs_info->pinned_extents, + start, end, EXTENT_UPTODATE); + return 0; +} + +void free_excluded_extents(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 start, end; + + start = cache->key.objectid; + end = start + cache->key.offset - 1; + + clear_extent_bits(&root->fs_info->pinned_extents, + start, end, EXTENT_UPTODATE); +} + +int exclude_super_stripes(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { + stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; + cache->bytes_super += stripe_len; + ret = add_excluded_extent(root, cache->key.objectid, + stripe_len); + if (ret) + return ret; + } + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(root->fs_info, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + if (ret) + return ret; + + while (nr--) { + u64 start, len; + + if (logical[nr] > cache->key.objectid + + cache->key.offset) + continue; + + if (logical[nr] + stripe_len <= cache->key.objectid) + continue; + + start = logical[nr]; + if (start < cache->key.objectid) { + start = cache->key.objectid; + len = (logical[nr] + stripe_len) - start; + } else { + len = min_t(u64, stripe_len, + cache->key.objectid + + cache->key.offset - start); + } + + cache->bytes_super += len; + ret = add_excluded_extent(root, start, len); + if (ret) { + kfree(logical); + return ret; + } + } + + kfree(logical); + } return 0; } + +u64 add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end) +{ + u64 extent_start, extent_end, size, total_added = 0; + int ret; + + while (start < end) { + ret = find_first_extent_bit(&info->pinned_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY | EXTENT_UPTODATE); + if (ret) + break; + + if (extent_start <= start) { + start = extent_end + 1; + } else if (extent_start > start && extent_start < end) { + size = extent_start - start; + total_added += size; + ret = btrfs_add_free_space(block_group->free_space_ctl, + start, size); + BUG_ON(ret); /* -ENOMEM or logic error */ + start = extent_end + 1; + } else { + break; + } + } + + if (start < end) { + size = end - start; + total_added += size; + ret = btrfs_add_free_space(block_group->free_space_ctl, start, + size); + BUG_ON(ret); /* -ENOMEM or logic error */ + } + + return total_added; +}