X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=extent-tree.c;h=e2ae74a7fe66bfbecb64bf27bf7d6994666135fa;hb=58fae8ff58b973737326882ca1f5a410a70f1f03;hp=797945732c84c9bcfdad05ecd32e4acdcbd35f8d;hpb=8d2359c8d9419be10e3af75f426e984df3d2e020;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/extent-tree.c b/extent-tree.c index 7979457..e2ae74a 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "kerncompat.h" #include "radix-tree.h" #include "ctree.h" @@ -28,7 +29,7 @@ #include "crc32c.h" #include "volumes.h" #include "free-space-cache.h" -#include "math.h" +#include "utils.h" #define PENDING_EXTENT_INSERT 0 #define PENDING_EXTENT_DELETE 1 @@ -68,18 +69,18 @@ static int remove_sb_from_cache(struct btrfs_root *root, u64 *logical; int stripe_len; int i, nr, ret; + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_io_tree *free_space_cache; - free_space_cache = &root->fs_info->free_space_cache; + free_space_cache = &fs_info->free_space_cache; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - ret = btrfs_rmap_block(&root->fs_info->mapping_tree, - cache->key.objectid, bytenr, 0, + ret = btrfs_rmap_block(fs_info, cache->key.objectid, bytenr, 0, &logical, &nr, &stripe_len); BUG_ON(ret); while (nr--) { clear_extent_dirty(free_space_cache, logical[nr], - logical[nr] + stripe_len - 1, GFP_NOFS); + logical[nr] + stripe_len - 1); } kfree(logical); } @@ -148,11 +149,10 @@ static int cache_block_group(struct btrfs_root *root, if (key.objectid > last) { hole_size = key.objectid - last; set_extent_dirty(free_space_cache, last, - last + hole_size - 1, - GFP_NOFS); + last + hole_size - 1); } if (key.type == BTRFS_METADATA_ITEM_KEY) - last = key.objectid + root->leafsize; + last = key.objectid + root->fs_info->nodesize; else last = key.objectid + key.offset; } @@ -164,8 +164,7 @@ next: block_group->key.offset > last) { hole_size = block_group->key.objectid + block_group->key.offset - last; - set_extent_dirty(free_space_cache, last, - last + hole_size - 1, GFP_NOFS); + set_extent_dirty(free_space_cache, last, last + hole_size - 1); } remove_sb_from_cache(root, block_group); block_group->cached = 1; @@ -285,7 +284,7 @@ out: if (!cache) { printk("Unable to find block group for %llu\n", (unsigned long long)search_start); - WARN_ON(1); + return -ENOENT; } return -ENOSPC; @@ -573,7 +572,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, return ret; BUG_ON(ret); - ret = btrfs_extend_item(trans, root, path, new_size); + ret = btrfs_extend_item(root, path, new_size); BUG_ON(ret); leaf = path->nodes[0]; @@ -597,7 +596,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, } #endif -static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) +u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) { u32 high_crc = ~(u32)0; u32 low_crc = ~(u32)0; @@ -856,8 +855,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, return ret; } -static noinline u32 extent_data_ref_count(struct btrfs_root *root, - struct btrfs_path *path, +static noinline u32 extent_data_ref_count(struct btrfs_path *path, struct btrfs_extent_inline_ref *iref) { struct btrfs_key key; @@ -972,27 +970,6 @@ static inline int extent_ref_type(u64 parent, u64 owner) return type; } -static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) - -{ - int level; - for (level = 0; level < BTRFS_MAX_LEVEL; level++) { - if (!path->nodes[level]) - break; - if (path->slots[level] + 1 >= - btrfs_header_nritems(path->nodes[level])) - continue; - if (level == 0) - btrfs_item_key_to_cpu(path->nodes[level], key, - path->slots[level] + 1); - else - btrfs_node_key_to_cpu(path->nodes[level], key, - path->slots[level] + 1); - return 0; - } - return 1; -} - static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -1015,8 +992,7 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, int ret; int err = 0; int skinny_metadata = - btrfs_fs_incompat(root->fs_info, - BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; @@ -1029,7 +1005,6 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, extra_size = -1; if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) { - skinny_metadata = 1; key.type = BTRFS_METADATA_ITEM_KEY; key.offset = owner; } else if (skinny_metadata) { @@ -1185,8 +1160,7 @@ out: return err; } -static int setup_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int setup_inline_extent_backref(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, u64 parent, u64 root_objectid, @@ -1209,7 +1183,7 @@ static int setup_inline_extent_backref(struct btrfs_trans_handle *trans, type = extent_ref_type(parent, owner); size = btrfs_extent_inline_ref_size(type); - ret = btrfs_extend_item(trans, root, path, size); + ret = btrfs_extend_item(root, path, size); BUG_ON(ret); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); @@ -1329,7 +1303,7 @@ static int update_inline_extent_backref(struct btrfs_trans_handle *trans, memmove_extent_buffer(leaf, ptr, ptr + size, end - ptr - size); item_size -= size; - ret = btrfs_truncate_item(trans, root, path, item_size, 1); + ret = btrfs_truncate_item(root, path, item_size, 1); BUG_ON(ret); } btrfs_mark_buffer_dirty(leaf); @@ -1354,7 +1328,7 @@ static int insert_inline_extent_backref(struct btrfs_trans_handle *trans, ret = update_inline_extent_backref(trans, root, path, iref, refs_to_add); } else if (ret == -ENOENT) { - ret = setup_inline_extent_backref(trans, root, path, iref, + ret = setup_inline_extent_backref(root, path, iref, parent, root_objectid, owner, offset, refs_to_add); } @@ -1418,7 +1392,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = 1; - path->leave_spinning = 1; ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, path, bytenr, num_bytes, parent, @@ -1440,7 +1413,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); path->reada = 1; - path->leave_spinning = 1; /* now insert the actual backref */ ret = insert_extent_backref(trans, root->fs_info->extent_root, @@ -1478,9 +1450,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u64 extent_flags; if (metadata && - !btrfs_fs_incompat(root->fs_info, - BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) { - offset = root->leafsize; + !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { + offset = root->fs_info->nodesize; metadata = 0; } @@ -1515,14 +1486,14 @@ again: path->slots[0]); if (key.objectid == bytenr && key.type == BTRFS_EXTENT_ITEM_KEY && - key.offset == root->leafsize) + key.offset == root->fs_info->nodesize) ret = 0; } if (ret) { btrfs_release_path(path); key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = root->leafsize; + key.offset = root->fs_info->nodesize; metadata = 0; goto again; } @@ -1574,8 +1545,7 @@ int btrfs_set_block_flags(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u32 item_size; int skinny_metadata = - btrfs_fs_incompat(root->fs_info, - BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); path = btrfs_alloc_path(); if (!path) @@ -1587,7 +1557,7 @@ int btrfs_set_block_flags(struct btrfs_trans_handle *trans, key.offset = level; key.type = BTRFS_METADATA_ITEM_KEY; } else { - key.offset = root->leafsize; + key.offset = root->fs_info->nodesize; key.type = BTRFS_EXTENT_ITEM_KEY; } @@ -1604,13 +1574,13 @@ again: btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid == bytenr && - key.offset == root->leafsize && + key.offset == root->fs_info->nodesize && key.type == BTRFS_EXTENT_ITEM_KEY) ret = 0; } if (ret) { btrfs_release_path(path); - key.offset = root->leafsize; + key.offset = root->fs_info->nodesize; key.type = BTRFS_EXTENT_ITEM_KEY; goto again; } @@ -1686,7 +1656,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, cond_resched(); if (level == 0) { btrfs_item_key_to_cpu(buf, &key, i); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + if (key.type != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); @@ -1708,7 +1678,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, } } else { bytenr = btrfs_node_blockptr(buf, i); - num_bytes = btrfs_level_size(root, level - 1); + num_bytes = root->fs_info->nodesize; ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, level - 1, 0); if (ret) { @@ -1799,7 +1769,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, BUG_ON(ret); clear_extent_bits(block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); + BLOCK_GROUP_DIRTY); cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; ret = write_one_cache_group(trans, root, path, cache); @@ -1811,11 +1781,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { - struct list_head *head = &info->space_info; - struct list_head *cur; struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); + + flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; + + list_for_each_entry(found, &info->space_info, list) { if (found->flags & flags) return found; } @@ -1823,6 +1793,31 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, } +static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + + /* only support free block group which is empty */ + if (bytes_used) + return -ENOTEMPTY; + + found = __find_space_info(fs_info, flags); + if (!found) + return -ENOENT; + if (found->total_bytes < total_bytes) { + fprintf(stderr, + "WARNING: bad space info to free %llu only have %llu\n", + total_bytes, found->total_bytes); + return -EINVAL; + } + found->total_bytes -= total_bytes; + if (space_info) + *space_info = found; + return 0; +} + static int update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, struct btrfs_space_info **space_info) @@ -1847,7 +1842,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return -ENOMEM; list_add(&found->list, &info->space_info); - found->flags = flags; + found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; found->total_bytes = total_bytes; found->bytes_used = bytes_used; found->bytes_pinned = 0; @@ -1876,7 +1871,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } static int do_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, u64 alloc_bytes, + struct btrfs_fs_info *fs_info, u64 alloc_bytes, u64 flags) { struct btrfs_space_info *space_info; @@ -1885,10 +1880,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 num_bytes; int ret; - space_info = __find_space_info(extent_root->fs_info, flags); + space_info = __find_space_info(fs_info, flags); if (!space_info) { - ret = update_space_info(extent_root->fs_info, flags, - 0, 0, &space_info); + ret = update_space_info(fs_info, flags, 0, 0, &space_info); BUG_ON(ret); } BUG_ON(!space_info); @@ -1901,7 +1895,17 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, thresh) return 0; - ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, + /* + * Avoid allocating given chunk type + */ + if (fs_info->avoid_meta_chunk_alloc && + (flags & BTRFS_BLOCK_GROUP_METADATA)) + return 0; + if (fs_info->avoid_sys_chunk_alloc && + (flags & BTRFS_BLOCK_GROUP_SYSTEM)) + return 0; + + ret = btrfs_alloc_chunk(trans, fs_info, &start, &num_bytes, space_info->flags); if (ret == -ENOSPC) { space_info->full = 1; @@ -1910,14 +1914,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, BUG_ON(ret); - ret = btrfs_make_block_group(trans, extent_root, 0, space_info->flags, - BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); + ret = btrfs_make_block_group(trans, fs_info, 0, space_info->flags, + start, num_bytes); BUG_ON(ret); return 0; } -static int update_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int update_block_group(struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free) { @@ -1955,7 +1958,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, start = cache->key.objectid; end = start + cache->key.offset - 1; set_extent_bits(&info->block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); + BLOCK_GROUP_DIRTY); old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); @@ -1968,8 +1971,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, cache->space_info->bytes_used -= num_bytes; if (mark_free) { set_extent_dirty(&info->free_space_cache, - bytenr, bytenr + num_bytes - 1, - GFP_NOFS); + bytenr, bytenr + num_bytes - 1); } } btrfs_set_block_group_used(&cache->item, old_val); @@ -1988,15 +1990,15 @@ static int update_pinned_extents(struct btrfs_root *root, if (pin) { set_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + num - 1, GFP_NOFS); + bytenr, bytenr + num - 1); } else { clear_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + num - 1, GFP_NOFS); + bytenr, bytenr + num - 1); } while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); if (!cache) { - len = min((u64)root->sectorsize, num); + len = min((u64)fs_info->sectorsize, num); goto next; } WARN_ON(!cache); @@ -2034,8 +2036,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, if (ret) break; update_pinned_extents(root, start, end + 1 - start, 0); - clear_extent_dirty(unpin, start, end, GFP_NOFS); - set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + clear_extent_dirty(unpin, start, end); + set_extent_dirty(free_space_cache, start, end); } return 0; } @@ -2066,8 +2068,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; int skinny_metadata = - btrfs_fs_incompat(extent_root->fs_info, - BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA); while(1) { ret = find_first_extent_bit(&info->extent_ins, 0, &start, @@ -2099,8 +2100,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, BUG_ON(1); } - clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, - GFP_NOFS); + clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED); kfree(extent_op); } return 0; @@ -2116,7 +2116,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, if (is_data) goto pinit; - buf = btrfs_find_tree_block(root, bytenr, num_bytes); + buf = btrfs_find_tree_block(root->fs_info, bytenr, num_bytes); if (!buf) goto pinit; @@ -2168,7 +2168,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_path *path; - struct btrfs_extent_ops *ops = root->fs_info->extent_ops; struct btrfs_root *extent_root = root->fs_info->extent_root; struct extent_buffer *leaf; struct btrfs_extent_item *ei; @@ -2181,8 +2180,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, u32 item_size; u64 refs; int skinny_metadata = - btrfs_fs_incompat(extent_root->fs_info, - BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA); if (root->fs_info->free_extent_hook) { root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes, @@ -2195,7 +2193,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = 1; - path->leave_spinning = 1; is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; if (is_data) @@ -2239,7 +2236,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, is_data); BUG_ON(ret); btrfs_release_path(path); - path->leave_spinning = 1; key.objectid = bytenr; @@ -2304,7 +2300,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, BUG_ON(ret < 0); btrfs_release_path(path); - path->leave_spinning = 1; key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; @@ -2362,7 +2357,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, if (found_extent) { BUG_ON(is_data && refs_to_drop != - extent_data_ref_count(root, path, iref)); + extent_data_ref_count(path, iref)); if (iref) { BUG_ON(path->slots[0] != extent_slot); } else { @@ -2372,14 +2367,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, } } - if (ops && ops->free_extent) { - ret = ops->free_extent(root, bytenr, num_bytes); - if (ret > 0) { - pin = 0; - mark_free = 0; - } - } - if (pin) { ret = pin_down_bytes(trans, root, bytenr, num_bytes, is_data); @@ -2398,7 +2385,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } - update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); + update_block_group(root, bytenr, num_bytes, 0, mark_free); } fail: btrfs_free_path(path); @@ -2435,8 +2422,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct BUG_ON(ret); extent_op = (struct pending_extent_op *)(unsigned long)priv; - clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, - GFP_NOFS); + clear_extent_bits(pending_del, start, end, EXTENT_LOCKED); if (!test_range_bit(extent_ins, start, end, EXTENT_LOCKED, 0)) { @@ -2453,7 +2439,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct (unsigned long)priv; clear_extent_bits(extent_ins, start, end, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_LOCKED); if (extent_op->type == PENDING_BACKREF_UPDATE) BUG_ON(1); @@ -2466,6 +2452,17 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct return err; } + +int btrfs_free_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + u64 parent, int last_ref) +{ + return btrfs_free_extent(trans, root, buf->start, buf->len, parent, + root->root_key.objectid, + btrfs_header_level(buf), 0); +} + /* * remove an extent from the root, returns 0 on success */ @@ -2479,7 +2476,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, int pending_ret; int ret; - WARN_ON(num_bytes < root->sectorsize); + WARN_ON(num_bytes < root->fs_info->sectorsize); if (root == extent_root) { struct pending_extent_op *extent_op; @@ -2493,7 +2490,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->pending_del, bytenr, bytenr + num_bytes - 1, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_LOCKED); set_state_private(&root->fs_info->pending_del, bytenr, (unsigned long)extent_op); return 0; @@ -2506,9 +2503,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, static u64 stripe_align(struct btrfs_root *root, u64 val) { - u64 mask = ((u64)root->stripesize - 1); - u64 ret = (val + mask) & ~mask; - return ret; + return round_up(val, (u64)root->fs_info->stripesize); } /* @@ -2536,8 +2531,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int full_scan = 0; int wrapped = 0; - WARN_ON(num_bytes < root->sectorsize); - btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + WARN_ON(num_bytes < info->sectorsize); + ins->type = BTRFS_EXTENT_ITEM_KEY; search_start = stripe_align(root, search_start); @@ -2591,6 +2586,13 @@ check_failed: goto new_group; } + if (info->excluded_extents && + test_range_bit(info->excluded_extents, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; @@ -2598,6 +2600,21 @@ check_failed: } if (!(data & BTRFS_BLOCK_GROUP_DATA)) { + if (check_crossing_stripes(info, ins->objectid, num_bytes)) { + struct btrfs_block_group_cache *bg_cache; + u64 bg_offset; + + bg_cache = btrfs_lookup_block_group(info, ins->objectid); + if (!bg_cache) + goto no_bg_cache; + bg_offset = ins->objectid - bg_cache->key.objectid; + + search_start = round_up( + bg_offset + num_bytes, BTRFS_STRIPE_LEN) + + bg_cache->key.objectid; + goto new_group; + } +no_bg_cache: block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) trans->block_group = block_group; @@ -2633,57 +2650,49 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 empty_size, u64 hint_byte, u64 search_end, - struct btrfs_key *ins, int data) + struct btrfs_key *ins, bool is_data) { int ret; u64 search_start = 0; u64 alloc_profile; + u64 profile; struct btrfs_fs_info *info = root->fs_info; - if (info->extent_ops) { - struct btrfs_extent_ops *ops = info->extent_ops; - ret = ops->alloc_extent(root, num_bytes, hint_byte, ins); - BUG_ON(ret); - goto found; - } - - if (data) { + if (is_data) { alloc_profile = info->avail_data_alloc_bits & info->data_alloc_profile; - data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; - } else if ((info->system_allocs > 0 || root == info->chunk_root) && - info->system_allocs >= 0) { + profile = BTRFS_BLOCK_GROUP_DATA | alloc_profile; + } else if (info->system_allocs == 1 || root == info->chunk_root) { alloc_profile = info->avail_system_alloc_bits & info->system_alloc_profile; - data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; + profile = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; } else { alloc_profile = info->avail_metadata_alloc_bits & info->metadata_alloc_profile; - data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; + profile = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } if (root->ref_cows) { - if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { - ret = do_chunk_alloc(trans, root->fs_info->extent_root, + if (!(profile & BTRFS_BLOCK_GROUP_METADATA)) { + ret = do_chunk_alloc(trans, info, num_bytes, BTRFS_BLOCK_GROUP_METADATA); BUG_ON(ret); } - ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data); + ret = do_chunk_alloc(trans, info, + num_bytes + SZ_2M, profile); BUG_ON(ret); } - WARN_ON(num_bytes < root->sectorsize); + WARN_ON(num_bytes < info->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, - trans->alloc_exclude_nr, data); - BUG_ON(ret); -found: - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); + trans->alloc_exclude_nr, profile); + if (ret < 0) + return ret; + clear_extent_dirty(&info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1); return ret; } @@ -2701,17 +2710,15 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct extent_buffer *leaf; u32 size = sizeof(*extent_item) + sizeof(*iref); - int skinny_metadata = - btrfs_fs_incompat(fs_info, - BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); if (!skinny_metadata) size += sizeof(*block_info); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; - path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, ins, size); BUG_ON(ret); @@ -2739,7 +2746,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); - ret = update_block_group(trans, root, ins->objectid, root->leafsize, + ret = update_block_group(root, ins->objectid, fs_info->nodesize, 1, 0); return ret; } @@ -2771,12 +2778,11 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_LOCKED); set_state_private(&root->fs_info->extent_ins, ins->objectid, (unsigned long)extent_op); } else { - if (btrfs_fs_incompat(root->fs_info, - BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) { + if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { ins->offset = level; ins->type = BTRFS_METADATA_ITEM_KEY; } @@ -2811,7 +2817,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(ret); } - buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); + buf = btrfs_find_create_tree_block(root->fs_info, ins.objectid); if (!buf) { btrfs_free_extent(trans, root, ins.objectid, ins.offset, 0, root->root_key.objectid, level, 0); @@ -2890,8 +2896,8 @@ static void noinline reada_walk_down(struct btrfs_root *root, for (i = slot; i < nritems && skipped < 32; i++) { bytenr = btrfs_node_blockptr(node, i); - if (last && ((bytenr > last && bytenr - last > 32 * 1024) || - (last > bytenr && last - bytenr > 32 * 1024))) { + if (last && ((bytenr > last && bytenr - last > SZ_32K) || + (last > bytenr && last - bytenr > SZ_32K))) { skipped++; continue; } @@ -2988,6 +2994,13 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = read_tree_block(root, bytenr, blocksize, ptr_gen); mutex_lock(&root->fs_info->fs_mutex); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + ret = PTR_ERR(next); + else + ret = -EIO; + break; + } } WARN_ON(*level <= 0); if (path->nodes[*level-1]) @@ -3090,8 +3103,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) break; ret = get_state_private(&info->block_group_cache, start, &ptr); if (!ret) { - cache = (struct btrfs_block_group_cache *) - (uintptr_t)ptr; + cache = u64_to_ptr(ptr); if (cache->free_space_ctl) { btrfs_remove_free_space_cache(cache); kfree(cache->free_space_ctl); @@ -3099,15 +3111,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) kfree(cache); } clear_extent_bits(&info->block_group_cache, start, - end, (unsigned int)-1, GFP_NOFS); + end, (unsigned int)-1); } while(1) { ret = find_first_extent_bit(&info->free_space_cache, 0, &start, &end, EXTENT_DIRTY); if (ret) break; - clear_extent_dirty(&info->free_space_cache, start, - end, GFP_NOFS); + clear_extent_dirty(&info->free_space_cache, start, end); } while (!list_empty(&info->space_info)) { @@ -3153,6 +3164,54 @@ error: return ret; } +static void account_super_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { + stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; + cache->bytes_super += stripe_len; + } + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(fs_info, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + if (ret) + return; + + while (nr--) { + u64 start, len; + + if (logical[nr] > cache->key.objectid + + cache->key.offset) + continue; + + if (logical[nr] + stripe_len <= cache->key.objectid) + continue; + + start = logical[nr]; + if (start < cache->key.objectid) { + start = cache->key.objectid; + len = (logical[nr] + stripe_len) - start; + } else { + len = min_t(u64, stripe_len, + cache->key.objectid + + cache->key.offset - start); + } + + cache->bytes_super += len; + } + + kfree(logical); + } +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -3171,7 +3230,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) root = info->extent_root; key.objectid = 0; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -3187,10 +3246,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) } leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + cache = kzalloc(sizeof(*cache), GFP_NOFS); if (!cache) { ret = -ENOMEM; - break; + goto error; } read_extent_buffer(leaf, &cache->item, @@ -3200,7 +3260,20 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->cached = 0; cache->pinned = 0; key.objectid = found_key.objectid + found_key.offset; + if (found_key.offset == 0) + key.objectid++; btrfs_release_path(path); + + /* + * Skip 0 sized block group, don't insert them into block + * group cache tree, as its length is 0, it won't get + * freed at close_ctree() time. + */ + if (found_key.offset == 0) { + free(cache); + continue; + } + cache->flags = btrfs_block_group_flags(&cache->item); bit = 0; if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { @@ -3211,9 +3284,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) bit = BLOCK_GROUP_METADATA; } set_avail_alloc_bits(info, cache->flags); - if (btrfs_chunk_readonly(root, cache->key.objectid)) + if (btrfs_chunk_readonly(info, cache->key.objectid)) cache->ro = 1; + account_super_bytes(info, cache); + ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), &space_info); @@ -3223,7 +3298,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) /* use EXTENT_LOCKED to prevent merging */ set_extent_bits(block_group_cache, found_key.objectid, found_key.objectid + found_key.offset - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + bit | EXTENT_LOCKED); set_state_private(block_group_cache, found_key.objectid, (unsigned long)cache); } @@ -3235,7 +3310,7 @@ error: struct btrfs_block_group_cache * btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, - u64 chunk_objectid, u64 chunk_offset, u64 size) + u64 chunk_offset, u64 size) { int ret; int bit = 0; @@ -3249,12 +3324,14 @@ btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, cache->key.objectid = chunk_offset; cache->key.offset = size; - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; btrfs_set_block_group_used(&cache->item, bytes_used); - btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); + btrfs_set_block_group_chunk_objectid(&cache->item, + BTRFS_FIRST_CHUNK_TREE_OBJECTID); cache->flags = type; btrfs_set_block_group_flags(&cache->item, type); + account_super_bytes(fs_info, cache); ret = update_space_info(fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); @@ -3262,7 +3339,7 @@ btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, bit = block_group_state_bits(type); ret = set_extent_bits(block_group_cache, chunk_offset, chunk_offset + size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + bit | EXTENT_LOCKED); BUG_ON(ret); ret = set_state_private(block_group_cache, chunk_offset, @@ -3274,17 +3351,15 @@ btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, } int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_objectid, u64 chunk_offset, - u64 size) + struct btrfs_fs_info *fs_info, u64 bytes_used, + u64 type, u64 chunk_offset, u64 size) { int ret; - struct btrfs_root *extent_root; + struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_block_group_cache *cache; - cache = btrfs_add_block_group(root->fs_info, bytes_used, type, - chunk_objectid, chunk_offset, size); - extent_root = root->fs_info->extent_root; + cache = btrfs_add_block_group(fs_info, bytes_used, type, chunk_offset, + size); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); BUG_ON(ret); @@ -3305,7 +3380,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, * before doing any block allocation. */ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_fs_info *fs_info) { u64 total_bytes; u64 cur_start; @@ -3317,15 +3392,14 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, u64 chunk_objectid; int ret; int bit; - struct btrfs_root *extent_root; + struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; - extent_root = root->fs_info->extent_root; - block_group_cache = &root->fs_info->block_group_cache; + block_group_cache = &fs_info->block_group_cache; chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); - group_align = 64 * root->sectorsize; + total_bytes = btrfs_super_total_bytes(fs_info->super_copy); + group_align = 64 * fs_info->sectorsize; cur_start = 0; while (cur_start < total_bytes) { @@ -3336,19 +3410,18 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, group_type = BTRFS_BLOCK_GROUP_SYSTEM; group_size /= 4; group_size &= ~(group_align - 1); - group_size = max_t(u64, group_size, 8 * 1024 * 1024); - group_size = min_t(u64, group_size, 32 * 1024 * 1024); + group_size = max_t(u64, group_size, SZ_8M); + group_size = min_t(u64, group_size, SZ_32M); } else { group_size &= ~(group_align - 1); if (total_data >= total_metadata * 2) { group_type = BTRFS_BLOCK_GROUP_METADATA; - group_size = min_t(u64, group_size, - 1ULL * 1024 * 1024 * 1024); + group_size = min_t(u64, group_size, SZ_1G); total_metadata += group_size; } else { group_type = BTRFS_BLOCK_GROUP_DATA; group_size = min_t(u64, group_size, - 5ULL * 1024 * 1024 * 1024); + 5ULL * SZ_1G); total_data += group_size; } if ((total_bytes - cur_start) * 4 < group_size * 5) @@ -3360,7 +3433,7 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, cache->key.objectid = cur_start; cache->key.offset = group_size; - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; btrfs_set_block_group_used(&cache->item, 0); btrfs_set_block_group_chunk_objectid(&cache->item, @@ -3369,14 +3442,14 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, cache->flags = group_type; - ret = update_space_info(root->fs_info, group_type, group_size, + ret = update_space_info(fs_info, group_type, group_size, 0, &cache->space_info); BUG_ON(ret); - set_avail_alloc_bits(extent_root->fs_info, group_type); + set_avail_alloc_bits(fs_info, group_type); set_extent_bits(block_group_cache, cur_start, cur_start + group_size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + bit | EXTENT_LOCKED); set_state_private(block_group_cache, cur_start, (unsigned long)cache); cur_start += group_size; @@ -3384,7 +3457,7 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, /* then insert all the items */ cur_start = 0; while(cur_start < total_bytes) { - cache = btrfs_lookup_block_group(root->fs_info, cur_start); + cache = btrfs_lookup_block_group(fs_info, cur_start); BUG_ON(!cache); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, @@ -3400,23 +3473,373 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, return 0; } -int btrfs_update_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_update_block_group(struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free) { - return update_block_group(trans, root, bytenr, num_bytes, + return update_block_group(root, bytenr, num_bytes, alloc, mark_free); } /* + * Just remove a block group item in extent tree + * Caller should ensure the block group is empty and all space is pinned. + * Or new tree block/data may be allocated into it. + */ +static int free_block_group_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_root *root = fs_info->extent_root; + int ret = 0; + + key.objectid = bytenr; + key.offset = len; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + if (ret < 0) + goto out; + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} + +static int free_dev_extent_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 devid, u64 dev_offset) +{ + struct btrfs_root *root = fs_info->dev_root; + struct btrfs_path *path; + struct btrfs_key key; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = devid; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = dev_offset; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} + +static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 chunk_offset) +{ + struct btrfs_chunk *chunk = NULL; + struct btrfs_root *root= fs_info->chunk_root; + struct btrfs_path *path; + struct btrfs_key key; + u16 num_stripes; + int i; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = chunk_offset; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk); + for (i = 0; i < num_stripes; i++) { + ret = free_dev_extent_item(trans, fs_info, + btrfs_stripe_devid_nr(path->nodes[0], chunk, i), + btrfs_stripe_offset_nr(path->nodes[0], chunk, i)); + if (ret < 0) + goto out; + } +out: + btrfs_free_path(path); + return ret; +} + +static int free_system_chunk_item(struct btrfs_super_block *super, + struct btrfs_key *key) +{ + struct btrfs_disk_key *disk_key; + struct btrfs_key cpu_key; + u32 array_size = btrfs_super_sys_array_size(super); + char *ptr = (char *)super->sys_chunk_array; + int cur = 0; + int ret = -ENOENT; + + while (cur < btrfs_super_sys_array_size(super)) { + struct btrfs_chunk *chunk; + u32 num_stripes; + u32 chunk_len; + + disk_key = (struct btrfs_disk_key *)(ptr + cur); + btrfs_disk_key_to_cpu(&cpu_key, disk_key); + if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) { + /* just in case */ + ret = -EIO; + goto out; + } + + chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key)); + num_stripes = btrfs_stack_chunk_num_stripes(chunk); + chunk_len = btrfs_chunk_item_size(num_stripes) + + sizeof(*disk_key); + + if (key->objectid == cpu_key.objectid && + key->offset == cpu_key.offset && + key->type == cpu_key.type) { + memmove(ptr + cur, ptr + cur + chunk_len, + array_size - cur - chunk_len); + array_size -= chunk_len; + btrfs_set_super_sys_array_size(super, array_size); + ret = 0; + goto out; + } + + cur += chunk_len; + } +out: + return ret; +} + +static int free_chunk_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_root *root = fs_info->chunk_root; + struct btrfs_chunk *chunk; + u64 chunk_type; + int ret; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.offset = bytenr; + key.type = BTRFS_CHUNK_ITEM_KEY; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + if (ret < 0) + goto out; + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + chunk_type = btrfs_chunk_type(path->nodes[0], chunk); + + ret = btrfs_del_item(trans, root, path); + if (ret < 0) + goto out; + + if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) + ret = free_system_chunk_item(fs_info->super_copy, &key); +out: + btrfs_free_path(path); + return ret; +} + +static u64 get_dev_extent_len(struct map_lookup *map) +{ + int div; + + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* Single */ + case BTRFS_BLOCK_GROUP_DUP: + case BTRFS_BLOCK_GROUP_RAID1: + div = 1; + break; + case BTRFS_BLOCK_GROUP_RAID5: + div = (map->num_stripes - 1); + break; + case BTRFS_BLOCK_GROUP_RAID6: + div = (map->num_stripes - 2); + break; + case BTRFS_BLOCK_GROUP_RAID10: + div = (map->num_stripes / map->sub_stripes); + break; + default: + /* normally, read chunk security hook should handled it */ + BUG_ON(1); + } + return map->ce.size / div; +} + +/* free block group/chunk related caches */ +static int free_block_group_cache(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) +{ + struct btrfs_block_group_cache *cache; + struct cache_extent *ce; + struct map_lookup *map; + int ret; + int i; + u64 flags; + + /* Free block group cache first */ + cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!cache) + return -ENOENT; + flags = cache->flags; + if (cache->free_space_ctl) { + btrfs_remove_free_space_cache(cache); + kfree(cache->free_space_ctl); + } + clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len - 1, + (unsigned int)-1); + ret = free_space_info(fs_info, flags, len, 0, NULL); + if (ret < 0) + goto out; + kfree(cache); + + /* Then free mapping info and dev usage info */ + ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr); + if (!ce || ce->start != bytenr) { + ret = -ENOENT; + goto out; + } + map = container_of(ce, struct map_lookup, ce); + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_device *device; + + device = map->stripes[i].dev; + device->bytes_used -= get_dev_extent_len(map); + ret = btrfs_update_device(trans, device); + if (ret < 0) + goto out; + } + remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce); + free(map); +out: + return ret; +} + +int btrfs_free_block_group(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 len) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_path *path; + struct btrfs_block_group_item *bgi; + struct btrfs_key key; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = bytenr; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + key.offset = len; + + /* Double check the block group to ensure it's empty */ + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); + if (ret > 0) { + ret = -ENONET; + goto out; + } + if (ret < 0) + goto out; + + bgi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_block_group_item); + if (btrfs_disk_block_group_used(path->nodes[0], bgi)) { + fprintf(stderr, + "WARNING: block group [%llu,%llu) is not empty\n", + bytenr, bytenr + len); + ret = -EINVAL; + goto out; + } + btrfs_release_path(path); + + /* + * Now pin all space in the block group, to prevent further transaction + * allocate space from it. + * Every operation needs a transaction must be in the range. + */ + btrfs_pin_extent(fs_info, bytenr, len); + + /* delete block group item and chunk item */ + ret = free_block_group_item(trans, fs_info, bytenr, len); + if (ret < 0) { + fprintf(stderr, + "failed to free block group item for [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + + ret = free_chunk_dev_extent_items(trans, fs_info, bytenr); + if (ret < 0) { + fprintf(stderr, + "failed to dev extents belongs to [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + ret = free_chunk_item(trans, fs_info, bytenr); + if (ret < 0) { + fprintf(stderr, + "failed to free chunk for [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + + /* Now release the block_group_cache */ + ret = free_block_group_cache(trans, fs_info, bytenr, len); + btrfs_unpin_extent(fs_info, bytenr, len); + +out: + btrfs_free_path(path); + return ret; +} + +/* * Fixup block accounting. The initial block accounting created by * make_block_groups isn't accuracy in this case. */ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - int ret; + int ret = 0; int slot; u64 start = 0; u64 bytes_used = 0; @@ -3447,13 +3870,13 @@ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->block_group_cache, cache->key.objectid, cache->key.objectid + cache->key.offset -1, - BLOCK_GROUP_DIRTY, GFP_NOFS); + BLOCK_GROUP_DIRTY); } btrfs_init_path(&path); key.offset = 0; key.objectid = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, 0, 0); if (ret < 0) @@ -3473,32 +3896,93 @@ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(leaf, &key, slot); if (key.type == BTRFS_EXTENT_ITEM_KEY) { bytes_used += key.offset; - ret = btrfs_update_block_group(trans, root, + ret = btrfs_update_block_group(root, key.objectid, key.offset, 1, 0); BUG_ON(ret); } else if (key.type == BTRFS_METADATA_ITEM_KEY) { - bytes_used += root->leafsize; - ret = btrfs_update_block_group(trans, root, - key.objectid, root->leafsize, 1, 0); - BUG_ON(ret); + bytes_used += fs_info->nodesize; + ret = btrfs_update_block_group(root, + key.objectid, fs_info->nodesize, 1, 0); + if (ret) + goto out; } path.slots[0]++; } btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used); + ret = 0; +out: btrfs_release_path(&path); - return 0; + return ret; +} + +static void __get_extent_size(struct btrfs_root *root, struct btrfs_path *path, + u64 *start, u64 *len) +{ + struct btrfs_key key; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + BUG_ON(!(key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY)); + *start = key.objectid; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + *len = key.offset; + else + *len = root->fs_info->nodesize; } /* - * Record a file extent. Do all the required works, such as inserting - * file extent item, inserting extent item and backref item into extent - * tree and updating block accounting. + * Find first overlap extent for range [bytenr, bytenr + len) + * Return 0 for found and point path to it. + * Return >0 for not found. + * Return <0 for err */ -int btrfs_record_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 objectid, - struct btrfs_inode_item *inode, - u64 file_pos, u64 disk_bytenr, - u64 num_bytes) +int btrfs_search_overlap_extent(struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, u64 len) +{ + struct btrfs_key key; + u64 cur_start; + u64 cur_len; + int ret; + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + BUG_ON(ret == 0); + + ret = btrfs_previous_extent_item(root, path, 0); + if (ret < 0) + return ret; + /* no previous, check next extent */ + if (ret > 0) + goto next; + __get_extent_size(root, path, &cur_start, &cur_len); + /* Tail overlap */ + if (cur_start + cur_len > bytenr) + return 1; + +next: + ret = btrfs_next_extent_item(root, path, bytenr + len); + if (ret < 0) + return ret; + /* No next, prev already checked, no overlap */ + if (ret > 0) + return 0; + __get_extent_size(root, path, &cur_start, &cur_len); + /* head overlap*/ + if (cur_start < bytenr + len) + return 1; + return 0; +} + +static int __btrfs_record_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode, + u64 file_pos, u64 disk_bytenr, + u64 *ret_num_bytes) { int ret; struct btrfs_fs_info *info = root->fs_info; @@ -3506,36 +3990,102 @@ int btrfs_record_file_extent(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; struct btrfs_key ins_key; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_extent_item *ei; u64 nbytes; + u64 extent_num_bytes; + u64 extent_bytenr; + u64 extent_offset; + u64 num_bytes = *ret_num_bytes; + /* + * All supported file system should not use its 0 extent. + * As it's for hole + * + * And hole extent has no size limit, no need to loop. + */ if (disk_bytenr == 0) { ret = btrfs_insert_file_extent(trans, root, objectid, file_pos, disk_bytenr, num_bytes, num_bytes); return ret; } + num_bytes = min_t(u64, num_bytes, BTRFS_MAX_EXTENT_SIZE); - btrfs_init_path(&path); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* First to check extent overlap */ + ret = btrfs_search_overlap_extent(extent_root, path, disk_bytenr, + num_bytes); + if (ret < 0) + goto fail; + if (ret > 0) { + /* Found overlap */ + u64 cur_start; + u64 cur_len; + + __get_extent_size(extent_root, path, &cur_start, &cur_len); + /* + * For convert case, this extent should be a subset of + * existing one. + */ + BUG_ON(disk_bytenr < cur_start); + + extent_bytenr = cur_start; + extent_num_bytes = cur_len; + extent_offset = disk_bytenr - extent_bytenr; + } else { + /* No overlap, create new extent */ + btrfs_release_path(path); + ins_key.objectid = disk_bytenr; + ins_key.offset = num_bytes; + ins_key.type = BTRFS_EXTENT_ITEM_KEY; + + ret = btrfs_insert_empty_item(trans, extent_root, path, + &ins_key, sizeof(*ei)); + if (ret == 0) { + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + + btrfs_set_extent_refs(leaf, ei, 0); + btrfs_set_extent_generation(leaf, ei, 0); + btrfs_set_extent_flags(leaf, ei, + BTRFS_EXTENT_FLAG_DATA); + btrfs_mark_buffer_dirty(leaf); + ret = btrfs_update_block_group(root, disk_bytenr, + num_bytes, 1, 0); + if (ret) + goto fail; + } else if (ret != -EEXIST) { + goto fail; + } + btrfs_extent_post_op(trans, extent_root); + extent_bytenr = disk_bytenr; + extent_num_bytes = num_bytes; + extent_offset = 0; + } + btrfs_release_path(path); ins_key.objectid = objectid; ins_key.offset = file_pos; - btrfs_set_key_type(&ins_key, BTRFS_EXTENT_DATA_KEY); - ret = btrfs_insert_empty_item(trans, root, &path, &ins_key, + ins_key.type = BTRFS_EXTENT_DATA_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &ins_key, sizeof(*fi)); if (ret) goto fail; - leaf = path.nodes[0]; - fi = btrfs_item_ptr(leaf, path.slots[0], + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr); - btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); - btrfs_set_file_extent_offset(leaf, fi, 0); + btrfs_set_file_extent_disk_bytenr(leaf, fi, extent_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, extent_num_bytes); + btrfs_set_file_extent_offset(leaf, fi, extent_offset); btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); - btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, extent_num_bytes); btrfs_set_file_extent_compression(leaf, fi, 0); btrfs_set_file_extent_encryption(leaf, fi, 0); btrfs_set_file_extent_other_encoding(leaf, fi, 0); @@ -3543,42 +4093,164 @@ int btrfs_record_file_extent(struct btrfs_trans_handle *trans, nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes; btrfs_set_stack_inode_nbytes(inode, nbytes); + btrfs_release_path(path); - btrfs_release_path(&path); + ret = btrfs_inc_extent_ref(trans, root, extent_bytenr, extent_num_bytes, + 0, root->root_key.objectid, objectid, + file_pos - extent_offset); + if (ret) + goto fail; + ret = 0; + *ret_num_bytes = min(extent_num_bytes - extent_offset, num_bytes); +fail: + btrfs_free_path(path); + return ret; +} + +/* + * Record a file extent. Do all the required works, such as inserting + * file extent item, inserting extent item and backref item into extent + * tree and updating block accounting. + */ +int btrfs_record_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode, + u64 file_pos, u64 disk_bytenr, + u64 num_bytes) +{ + u64 cur_disk_bytenr = disk_bytenr; + u64 cur_file_pos = file_pos; + u64 cur_num_bytes = num_bytes; + int ret = 0; - ins_key.objectid = disk_bytenr; - ins_key.offset = num_bytes; - ins_key.type = BTRFS_EXTENT_ITEM_KEY; + while (num_bytes > 0) { + ret = __btrfs_record_file_extent(trans, root, objectid, + inode, cur_file_pos, + cur_disk_bytenr, + &cur_num_bytes); + if (ret < 0) + break; + cur_disk_bytenr += cur_num_bytes; + cur_file_pos += cur_num_bytes; + num_bytes -= cur_num_bytes; + } + return ret; +} - ret = btrfs_insert_empty_item(trans, extent_root, &path, - &ins_key, sizeof(*ei)); - if (ret == 0) { - leaf = path.nodes[0]; - ei = btrfs_item_ptr(leaf, path.slots[0], - struct btrfs_extent_item); - btrfs_set_extent_refs(leaf, ei, 0); - btrfs_set_extent_generation(leaf, ei, 0); - btrfs_set_extent_flags(leaf, ei, BTRFS_EXTENT_FLAG_DATA); +static int add_excluded_extent(struct btrfs_root *root, + u64 start, u64 num_bytes) +{ + u64 end = start + num_bytes - 1; + set_extent_bits(&root->fs_info->pinned_extents, + start, end, EXTENT_UPTODATE); + return 0; +} - btrfs_mark_buffer_dirty(leaf); +void free_excluded_extents(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 start, end; - ret = btrfs_update_block_group(trans, root, disk_bytenr, - num_bytes, 1, 0); + start = cache->key.objectid; + end = start + cache->key.offset - 1; + + clear_extent_bits(&root->fs_info->pinned_extents, + start, end, EXTENT_UPTODATE); +} + +int exclude_super_stripes(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { + stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; + cache->bytes_super += stripe_len; + ret = add_excluded_extent(root, cache->key.objectid, + stripe_len); if (ret) - goto fail; - } else if (ret != -EEXIST) { - goto fail; + return ret; } - btrfs_extent_post_op(trans, extent_root); - ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, 0, - root->root_key.objectid, - objectid, file_pos); - if (ret) - goto fail; - ret = 0; -fail: - btrfs_release_path(&path); - return ret; + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(root->fs_info, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + if (ret) + return ret; + + while (nr--) { + u64 start, len; + + if (logical[nr] > cache->key.objectid + + cache->key.offset) + continue; + + if (logical[nr] + stripe_len <= cache->key.objectid) + continue; + + start = logical[nr]; + if (start < cache->key.objectid) { + start = cache->key.objectid; + len = (logical[nr] + stripe_len) - start; + } else { + len = min_t(u64, stripe_len, + cache->key.objectid + + cache->key.offset - start); + } + + cache->bytes_super += len; + ret = add_excluded_extent(root, start, len); + if (ret) { + kfree(logical); + return ret; + } + } + + kfree(logical); + } + return 0; +} + +u64 add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end) +{ + u64 extent_start, extent_end, size, total_added = 0; + int ret; + + while (start < end) { + ret = find_first_extent_bit(&info->pinned_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY | EXTENT_UPTODATE); + if (ret) + break; + + if (extent_start <= start) { + start = extent_end + 1; + } else if (extent_start > start && extent_start < end) { + size = extent_start - start; + total_added += size; + ret = btrfs_add_free_space(block_group->free_space_ctl, + start, size); + BUG_ON(ret); /* -ENOMEM or logic error */ + start = extent_end + 1; + } else { + break; + } + } + + if (start < end) { + size = end - start; + total_added += size; + ret = btrfs_add_free_space(block_group->free_space_ctl, start, + size); + BUG_ON(ret); /* -ENOMEM or logic error */ + } + + return total_added; }