X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=extent-tree.c;h=b52c515974914d15e72de88b868977ffb441187c;hb=981441102199c5c9e3fca84ff3e723ddfa78de01;hp=f3c36e10ea24df4a0af137ba545cb834db170dcf;hpb=97864fa126c1e7675595882943d0397c45ef4b67;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/extent-tree.c b/extent-tree.c index f3c36e1..b52c515 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -18,6 +18,8 @@ #include #include +#include +#include #include "kerncompat.h" #include "radix-tree.h" #include "ctree.h" @@ -26,21 +28,64 @@ #include "transaction.h" #include "crc32c.h" #include "volumes.h" +#include "free-space-cache.h" +#include "utils.h" -#define BLOCK_GROUP_DATA EXTENT_WRITEBACK -#define BLOCK_GROUP_METADATA EXTENT_UPTODATE -#define BLOCK_GROUP_SYSTEM EXTENT_NEW - -#define BLOCK_GROUP_DIRTY EXTENT_DIRTY +#define PENDING_EXTENT_INSERT 0 +#define PENDING_EXTENT_DELETE 1 +#define PENDING_BACKREF_UPDATE 2 +struct pending_extent_op { + int type; + u64 bytenr; + u64 num_bytes; + u64 flags; + struct btrfs_disk_key key; + int level; +}; + +static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 generation, + u64 flags, struct btrfs_disk_key *key, + int level, struct btrfs_key *ins); +static int __free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner_objectid, + u64 owner_offset, int refs_to_drop); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, - u64 size); +static struct btrfs_block_group_cache * +btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache + *hint, u64 search_start, int data, int owner); + +static int remove_sb_from_cache(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + struct extent_io_tree *free_space_cache; + + free_space_cache = &root->fs_info->free_space_cache; + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + cache->key.objectid, bytenr, 0, + &logical, &nr, &stripe_len); + BUG_ON(ret); + while (nr--) { + clear_extent_dirty(free_space_cache, logical[nr], + logical[nr] + stripe_len - 1, GFP_NOFS); + } + kfree(logical); + } + return 0; +} static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) @@ -51,10 +96,8 @@ static int cache_block_group(struct btrfs_root *root, struct extent_buffer *leaf; struct extent_io_tree *free_space_cache; int slot; - u64 last = 0; + u64 last; u64 hole_size; - u64 first_free; - int found = 0; if (!block_group) return 0; @@ -70,22 +113,15 @@ static int cache_block_group(struct btrfs_root *root, return -ENOMEM; path->reada = 2; - first_free = block_group->key.objectid; - key.objectid = block_group->key.objectid; + last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); + key.objectid = last; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.type = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) - return ret; - ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); - if (ret < 0) - return ret; - if (ret == 0) { - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid + key.offset > first_free) - first_free = key.objectid + key.offset; - } + goto err; + while(1) { leaf = path->nodes[0]; slot = path->slots[0]; @@ -108,25 +144,23 @@ static int cache_block_group(struct btrfs_root *root, break; } - if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { - if (!found) { - last = first_free; - found = 1; - } + if (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY) { if (key.objectid > last) { hole_size = key.objectid - last; set_extent_dirty(free_space_cache, last, last + hole_size - 1, GFP_NOFS); } - last = key.objectid + key.offset; + if (key.type == BTRFS_METADATA_ITEM_KEY) + last = key.objectid + root->nodesize; + else + last = key.objectid + key.offset; } next: path->slots[0]++; } - if (!found) - last = first_free; if (block_group->key.objectid + block_group->key.offset > last) { hole_size = block_group->key.objectid + @@ -134,12 +168,42 @@ next: set_extent_dirty(free_space_cache, last, last + hole_size - 1, GFP_NOFS); } + remove_sb_from_cache(root, block_group); block_group->cached = 1; err: btrfs_free_path(path); return 0; } +struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct + btrfs_fs_info *info, + u64 bytenr) +{ + struct extent_io_tree *block_group_cache; + struct btrfs_block_group_cache *block_group = NULL; + u64 ptr; + u64 start; + u64 end; + int ret; + + bytenr = max_t(u64, bytenr, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); + block_group_cache = &info->block_group_cache; + ret = find_first_extent_bit(block_group_cache, + bytenr, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) { + return NULL; + } + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + return NULL; + + block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; + return block_group; +} + struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) @@ -181,48 +245,43 @@ static int noinline find_search_start(struct btrfs_root *root, { int ret; struct btrfs_block_group_cache *cache = *cache_ret; - u64 last; + u64 last = *start_ret; u64 start = 0; u64 end = 0; - u64 cache_miss = 0; u64 search_start = *start_ret; int wrapped = 0; - if (!cache) { + if (!cache) goto out; - } again: ret = cache_block_group(root, cache); if (ret) goto out; last = max(search_start, cache->key.objectid); - if (!block_group_bits(cache, data)) { + if (cache->ro || !block_group_bits(cache, data)) goto new_group; - } while(1) { ret = find_first_extent_bit(&root->fs_info->free_space_cache, last, &start, &end, EXTENT_DIRTY); if (ret) { - if (!cache_miss) - cache_miss = last; goto new_group; } start = max(last, start); last = end + 1; if (last - start < num) { - if (last == cache->key.objectid + cache->key.offset) - cache_miss = start; continue; } - if (start + num > cache->key.objectid + cache->key.offset) + if (start + num > cache->key.objectid + cache->key.offset) { goto new_group; + } *start_ret = start; return 0; } out: + *start_ret = last; cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { printk("Unable to find block group for %llu\n", @@ -234,9 +293,8 @@ out: new_group: last = cache->key.objectid + cache->key.offset; wrapped: - cache = btrfs_lookup_block_group(root->fs_info, last); + cache = btrfs_lookup_first_block_group(root->fs_info, last); if (!cache) { -no_cache: if (!wrapped) { wrapped = 1; last = search_start; @@ -244,28 +302,10 @@ no_cache: } goto out; } - if (cache_miss && !cache->cached) { - cache_block_group(root, cache); - last = cache_miss; - cache = btrfs_lookup_block_group(root->fs_info, last); - } - cache = btrfs_find_block_group(root, cache, last, data, 0); - if (!cache) - goto no_cache; *cache_ret = cache; - cache_miss = 0; goto again; } -static u64 div_factor(u64 num, int factor) -{ - if (factor == 10) - return num; - num *= factor; - num /= 10; - return num; -} - static int block_group_state_bits(u64 flags) { int bits = 0; @@ -278,10 +318,9 @@ static int block_group_state_bits(u64 flags) return bits; } -struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache - *hint, u64 search_start, - int data, int owner) +static struct btrfs_block_group_cache * +btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache + *hint, u64 search_start, int data, int owner) { struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; @@ -297,19 +336,19 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int bit; int ret; int full_search = 0; - int factor = 8; + int factor = 10; block_group_cache = &info->block_group_cache; if (!owner) - factor = 8; + factor = 10; bit = block_group_state_bits(data); if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint && block_group_bits(shint, data)) { + if (shint && !shint->ro && block_group_bits(shint, data)) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { @@ -317,7 +356,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && block_group_bits(hint, data)) { + if (hint && !hint->ro && block_group_bits(hint, data)) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { @@ -348,7 +387,7 @@ again: last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); - if (block_group_bits(cache, data)) { + if (!cache->ro && block_group_bits(cache, data)) { if (full_search) free_check = cache->key.offset; else @@ -371,108 +410,6 @@ found: return found_group; } -static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) -{ - u32 high_crc = ~(u32)0; - u32 low_crc = ~(u32)0; - __le64 lenum; - - lenum = cpu_to_le64(root_objectid); - high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(ref_generation); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - if (owner >= BTRFS_FIRST_FREE_OBJECTID) { - lenum = cpu_to_le64(owner); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(owner_offset); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - } - return ((u64)high_crc << 32) | (u64)low_crc; -} - -static int match_extent_ref(struct extent_buffer *leaf, - struct btrfs_extent_ref *disk_ref, - struct btrfs_extent_ref *cpu_ref) -{ - int ret; - int len; - - if (cpu_ref->objectid) - len = sizeof(*cpu_ref); - else - len = 2 * sizeof(u64); - ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref, - len); - return ret == 0; -} - -static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 root_objectid, - u64 ref_generation, u64 owner, - u64 owner_offset, int del) -{ - u64 hash; - struct btrfs_key key; - struct btrfs_key found_key; - struct btrfs_extent_ref ref; - struct extent_buffer *leaf; - struct btrfs_extent_ref *disk_ref; - int ret; - int ret2; - - btrfs_set_stack_ref_root(&ref, root_objectid); - btrfs_set_stack_ref_generation(&ref, ref_generation); - btrfs_set_stack_ref_objectid(&ref, owner); - btrfs_set_stack_ref_offset(&ref, owner_offset); - - hash = hash_extent_ref(root_objectid, ref_generation, owner, - owner_offset); - key.offset = hash; - key.objectid = bytenr; - key.type = BTRFS_EXTENT_REF_KEY; - - while (1) { - ret = btrfs_search_slot(trans, root, &key, path, - del ? -1 : 0, del); - if (ret < 0) - goto out; - leaf = path->nodes[0]; - if (ret != 0) { - u32 nritems = btrfs_header_nritems(leaf); - if (path->slots[0] >= nritems) { - ret2 = btrfs_next_leaf(root, path); - if (ret2) - goto out; - leaf = path->nodes[0]; - } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != bytenr || - found_key.type != BTRFS_EXTENT_REF_KEY) - goto out; - key.offset = found_key.offset; - if (del) { - btrfs_release_path(root, path); - continue; - } - } - disk_ref = btrfs_item_ptr(path->nodes[0], - path->slots[0], - struct btrfs_extent_ref); - if (match_extent_ref(path->nodes[0], disk_ref, &ref)) { - ret = 0; - goto out; - } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - key.offset = found_key.offset + 1; - btrfs_release_path(root, path); - } -out: - return ret; -} - /* * Back reference rules. Back refs have three main goals: * @@ -487,1355 +424,2455 @@ out: * maintenance. This is actually the same as #2, but with a slightly * different use case. * - * File extents can be referenced by: + * There are two kinds of back refs. The implicit back refs is optimized + * for pointers in non-shared tree blocks. For a given pointer in a block, + * back refs of this kind provide information about the block's owner tree + * and the pointer's key. These information allow us to find the block by + * b-tree searching. The full back refs is for pointers in tree blocks not + * referenced by their owner trees. The location of tree block is recorded + * in the back refs. Actually the full back refs is generic, and can be + * used in all cases the implicit back refs is used. The major shortcoming + * of the full back refs is its overhead. Every time a tree block gets + * COWed, we have to update back refs entry for all pointers in it. * - * - multiple snapshots, subvolumes, or different generations in one subvol - * - different files inside a single subvolume (in theory, not implemented yet) - * - different offsets inside a file (bookend extents in file.c) + * For a newly allocated tree block, we use implicit back refs for + * pointers in it. This means most tree related operations only involve + * implicit back refs. For a tree block created in old transaction, the + * only way to drop a reference to it is COW it. So we can detect the + * event that tree block loses its owner tree's reference and do the + * back refs conversion. * - * The extent ref structure has fields for: + * When a tree block is COW'd through a tree, there are four cases: * - * - Objectid of the subvolume root - * - Generation number of the tree holding the reference - * - objectid of the file holding the reference - * - offset in the file corresponding to the key holding the reference + * The reference count of the block is one and the tree is the block's + * owner tree. Nothing to do in this case. * - * When a file extent is allocated the fields are filled in: - * (root_key.objectid, trans->transid, inode objectid, offset in file) + * The reference count of the block is one and the tree is not the + * block's owner tree. In this case, full back refs is used for pointers + * in the block. Remove these full back refs, add implicit back refs for + * every pointers in the new block. * - * When a leaf is cow'd new references are added for every file extent found - * in the leaf. It looks the same as the create case, but trans->transid - * will be different when the block is cow'd. + * The reference count of the block is greater than one and the tree is + * the block's owner tree. In this case, implicit back refs is used for + * pointers in the block. Add full back refs for every pointers in the + * block, increase lower level extents' reference counts. The original + * implicit back refs are entailed to the new block. * - * (root_key.objectid, trans->transid, inode objectid, offset in file) + * The reference count of the block is greater than one and the tree is + * not the block's owner tree. Add implicit back refs for every pointer in + * the new block, increase lower level extents' reference count. * - * When a file extent is removed either during snapshot deletion or file - * truncation, the corresponding back reference is found - * by searching for: + * Back Reference Key composing: * - * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), - * inode objectid, offset in file) + * The key objectid corresponds to the first byte in the extent, + * The key type is used to differentiate between types of back refs. + * There are different meanings of the key offset for different types + * of back refs. * - * Btree extents can be referenced by: + * File extents can be referenced by: * - * - Different subvolumes - * - Different generations of the same subvolume + * - multiple snapshots, subvolumes, or different generations in one subvol + * - different files inside a single subvolume + * - different offsets inside a file (bookend extents in file.c) + * + * The extent ref structure for the implicit back refs has fields for: + * + * - Objectid of the subvolume root + * - objectid of the file holding the reference + * - original offset in the file + * - how many bookend extents + * + * The key offset for the implicit back refs is hash of the first + * three fields. * - * Storing sufficient information for a full reverse mapping of a btree - * block would require storing the lowest key of the block in the backref, - * and it would require updating that lowest key either before write out or - * every time it changed. Instead, the objectid of the lowest key is stored - * along with the level of the tree block. This provides a hint - * about where in the btree the block can be found. Searches through the - * btree only need to look for a pointer to that block, so they stop one - * level higher than the level recorded in the backref. + * The extent ref structure for the full back refs has field for: * - * Some btrees do not do reference counting on their extents. These - * include the extent tree and the tree of tree roots. Backrefs for these - * trees always have a generation of zero. + * - number of pointers in the tree leaf * - * When a tree block is created, back references are inserted: + * The key offset for the implicit back refs is the first byte of + * the tree leaf * - * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid) + * When a file extent is allocated, The implicit back refs is used. + * the fields are filled in: * - * When a tree block is cow'd in a reference counted root, - * new back references are added for all the blocks it points to. - * These are of the form (trans->transid will have increased since creation): + * (root_key.objectid, inode objectid, offset in file, 1) * - * (root->root_key.objectid, trans->transid, level, lowest_key_objectid) + * When a file extent is removed file truncation, we find the + * corresponding implicit back refs and check the following fields: * - * Because the lowest_key_objectid and the level are just hints - * they are not used when backrefs are deleted. When a backref is deleted: + * (btrfs_header_owner(leaf), inode objectid, offset in file) * - * if backref was for a tree root: - * root_objectid = root->root_key.objectid - * else - * root_objectid = btrfs_header_owner(parent) + * Btree extents can be referenced by: * - * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0) + * - Different subvolumes * - * Back Reference Key hashing: + * Both the implicit back refs and the full back refs for tree blocks + * only consist of key. The key offset for the implicit back refs is + * objectid of block's owner tree. The key offset for the full back refs + * is the first byte of parent block. * - * Back references have four fields, each 64 bits long. Unfortunately, - * This is hashed into a single 64 bit number and placed into the key offset. - * The key objectid corresponds to the first byte in the extent, and the - * key type is set to BTRFS_EXTENT_REF_KEY + * When implicit back refs is used, information about the lowest key and + * level of the tree block are required. These information are stored in + * tree block info structure. */ -int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) + +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 +static int convert_extent_item_v0(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 owner, u32 extra_size) { - u64 hash; + struct btrfs_extent_item *item; + struct btrfs_extent_item_v0 *ei0; + struct btrfs_extent_ref_v0 *ref0; + struct btrfs_tree_block_info *bi; + struct extent_buffer *leaf; struct btrfs_key key; - struct btrfs_extent_ref ref; - struct btrfs_extent_ref *disk_ref; + struct btrfs_key found_key; + u32 new_size = sizeof(*item); + u64 refs; int ret; - btrfs_set_stack_ref_root(&ref, root_objectid); - btrfs_set_stack_ref_generation(&ref, ref_generation); - btrfs_set_stack_ref_objectid(&ref, owner); - btrfs_set_stack_ref_offset(&ref, owner_offset); - - hash = hash_extent_ref(root_objectid, ref_generation, owner, - owner_offset); - key.offset = hash; - key.objectid = bytenr; - key.type = BTRFS_EXTENT_REF_KEY; - - ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); - while (ret == -EEXIST) { - disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - if (match_extent_ref(path->nodes[0], disk_ref, &ref)) - goto out; - key.offset++; - btrfs_release_path(root, path); - ret = btrfs_insert_empty_item(trans, root, path, &key, - sizeof(ref)); + leaf = path->nodes[0]; + BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + ei0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item_v0); + refs = btrfs_extent_refs_v0(leaf, ei0); + + if (owner == (u64)-1) { + while (1) { + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + BUG_ON(ret > 0); + leaf = path->nodes[0]; + } + btrfs_item_key_to_cpu(leaf, &found_key, + path->slots[0]); + BUG_ON(key.objectid != found_key.objectid); + if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { + path->slots[0]++; + continue; + } + ref0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref_v0); + owner = btrfs_ref_objectid_v0(leaf, ref0); + break; + } } - if (ret) - goto out; - disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref, - sizeof(ref)); - btrfs_mark_buffer_dirty(path->nodes[0]); -out: - btrfs_release_path(root, path); - return ret; -} - -int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) -{ - struct btrfs_path *path; - int ret; - struct btrfs_key key; - struct extent_buffer *l; - struct btrfs_extent_item *item; - u32 refs; + btrfs_release_path(path); - WARN_ON(num_bytes < root->sectorsize); - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (owner < BTRFS_FIRST_FREE_OBJECTID) + new_size += sizeof(*bi); - key.objectid = bytenr; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_bytes; - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, - 0, 1); + new_size -= sizeof(*ei0); + ret = btrfs_search_slot(trans, root, &key, path, new_size, 1); if (ret < 0) return ret; - if (ret != 0) { - BUG(); - } - BUG_ON(ret != 0); - l = path->nodes[0]; - item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - refs = btrfs_extent_refs(l, item); - btrfs_set_extent_refs(l, item, refs + 1); - btrfs_mark_buffer_dirty(path->nodes[0]); - - btrfs_release_path(root->fs_info->extent_root, path); + BUG_ON(ret); - ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, - path, bytenr, root_objectid, - ref_generation, owner, owner_offset); + ret = btrfs_extend_item(trans, root, path, new_size); BUG_ON(ret); - finish_current_insert(trans, root->fs_info->extent_root); - del_pending_extents(trans, root->fs_info->extent_root); - btrfs_free_path(path); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + btrfs_set_extent_refs(leaf, item, refs); + /* FIXME: get real generation */ + btrfs_set_extent_generation(leaf, item, 0); + if (owner < BTRFS_FIRST_FREE_OBJECTID) { + btrfs_set_extent_flags(leaf, item, + BTRFS_EXTENT_FLAG_TREE_BLOCK | + BTRFS_BLOCK_FLAG_FULL_BACKREF); + bi = (struct btrfs_tree_block_info *)(item + 1); + /* FIXME: get first key of the block */ + memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); + btrfs_set_tree_block_level(leaf, bi, (int)owner); + } else { + btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); + } + btrfs_mark_buffer_dirty(leaf); return 0; } +#endif -int btrfs_extent_post_op(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) { - finish_current_insert(trans, root->fs_info->extent_root); - del_pending_extents(trans, root->fs_info->extent_root); - return 0; + u32 high_crc = ~(u32)0; + u32 low_crc = ~(u32)0; + __le64 lenum; + + lenum = cpu_to_le64(root_objectid); + high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(owner); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(offset); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); + + return ((u64)high_crc << 31) ^ (u64)low_crc; } -static int lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u32 *refs) +static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, + struct btrfs_extent_data_ref *ref) { - struct btrfs_path *path; - int ret; - struct btrfs_key key; - struct extent_buffer *l; - struct btrfs_extent_item *item; + return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), + btrfs_extent_data_ref_objectid(leaf, ref), + btrfs_extent_data_ref_offset(leaf, ref)); +} - WARN_ON(num_bytes < root->sectorsize); - path = btrfs_alloc_path(); - key.objectid = bytenr; - key.offset = num_bytes; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, - 0, 0); - if (ret < 0) - goto out; - if (ret != 0) { - btrfs_print_leaf(root, path->nodes[0]); - printk("failed to find block number %llu\n", - (unsigned long long)bytenr); - BUG(); - } - l = path->nodes[0]; - item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - *refs = btrfs_extent_refs(l, item); -out: - btrfs_free_path(path); - return 0; +static int match_extent_data_ref(struct extent_buffer *leaf, + struct btrfs_extent_data_ref *ref, + u64 root_objectid, u64 owner, u64 offset) +{ + if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || + btrfs_extent_data_ref_objectid(leaf, ref) != owner || + btrfs_extent_data_ref_offset(leaf, ref) != offset) + return 0; + return 1; } -u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, - struct btrfs_path *count_path, - u64 first_extent) +static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 root_objectid, + u64 owner, u64 offset) { - struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_path *path; - u64 bytenr; - u64 found_objectid; - u64 root_objectid = root->root_key.objectid; - u32 total_count = 0; - u32 cur_count; - u32 refs; + struct btrfs_key key; + struct btrfs_extent_data_ref *ref; + struct extent_buffer *leaf; u32 nritems; int ret; - struct btrfs_key key; - struct btrfs_key found_key; - struct extent_buffer *l; - struct btrfs_extent_item *item; - struct btrfs_extent_ref *ref_item; - int level = -1; - - path = btrfs_alloc_path(); -again: - if (level == -1) - bytenr = first_extent; - else - bytenr = count_path->nodes[level]->start; + int recow; + int err = -ENOENT; - cur_count = 0; key.objectid = bytenr; - key.offset = 0; - - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); - if (ret < 0) - goto out; - BUG_ON(ret == 0); - - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + if (parent) { + key.type = BTRFS_SHARED_DATA_REF_KEY; + key.offset = parent; + } else { + key.type = BTRFS_EXTENT_DATA_REF_KEY; + key.offset = hash_extent_data_ref(root_objectid, + owner, offset); + } +again: + recow = 0; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + err = ret; + goto fail; + } - if (found_key.objectid != bytenr || - found_key.type != BTRFS_EXTENT_ITEM_KEY) { - goto out; + if (parent) { + if (!ret) + return 0; +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + key.type = BTRFS_EXTENT_REF_V0_KEY; + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + err = ret; + goto fail; + } + if (!ret) + return 0; +#endif + goto fail; } - item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - refs = btrfs_extent_refs(l, item); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); while (1) { - nritems = btrfs_header_nritems(l); if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(extent_root, path); - if (ret == 0) - continue; - break; - } - btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); - if (found_key.objectid != bytenr) - break; - if (found_key.type != BTRFS_EXTENT_REF_KEY) { - path->slots[0]++; - continue; - } + ret = btrfs_next_leaf(root, path); + if (ret < 0) + err = ret; + if (ret) + goto fail; - cur_count++; - ref_item = btrfs_item_ptr(l, path->slots[0], - struct btrfs_extent_ref); - found_objectid = btrfs_ref_root(l, ref_item); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + recow = 1; + } - if (found_objectid != root_objectid) { - total_count = 2; - goto out; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != bytenr || + key.type != BTRFS_EXTENT_DATA_REF_KEY) + goto fail; + + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + + if (match_extent_data_ref(leaf, ref, root_objectid, + owner, offset)) { + if (recow) { + btrfs_release_path(path); + goto again; + } + err = 0; + break; } - total_count = 1; path->slots[0]++; } - if (cur_count == 0) { - total_count = 0; - goto out; - } - if (level >= 0 && root->node == count_path->nodes[level]) - goto out; - level++; - btrfs_release_path(root, path); - goto again; - -out: - btrfs_free_path(path); - return total_count; -} -int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner_objectid) -{ - u64 generation; - u64 key_objectid; - u64 level; - u32 nritems; - struct btrfs_disk_key disk_key; - - level = btrfs_header_level(root->node); - generation = trans->transid; - nritems = btrfs_header_nritems(root->node); - if (nritems > 0) { - if (level == 0) - btrfs_item_key(root->node, &disk_key, 0); - else - btrfs_node_key(root->node, &disk_key, 0); - key_objectid = btrfs_disk_key_objectid(&disk_key); - } else { - key_objectid = 0; - } - return btrfs_inc_extent_ref(trans, root, root->node->start, - root->node->len, owner_objectid, - generation, level, key_objectid); +fail: + return err; } -int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf) +static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 root_objectid, u64 owner, + u64 offset, int refs_to_add) { - u64 bytenr; - u32 nritems; struct btrfs_key key; - struct btrfs_file_extent_item *fi; - int i; - int level; + struct extent_buffer *leaf; + u32 size; + u32 num_refs; int ret; - int faili; - if (!root->ref_cows) - return 0; + key.objectid = bytenr; + if (parent) { + key.type = BTRFS_SHARED_DATA_REF_KEY; + key.offset = parent; + size = sizeof(struct btrfs_shared_data_ref); + } else { + key.type = BTRFS_EXTENT_DATA_REF_KEY; + key.offset = hash_extent_data_ref(root_objectid, + owner, offset); + size = sizeof(struct btrfs_extent_data_ref); + } - level = btrfs_header_level(buf); - nritems = btrfs_header_nritems(buf); - for (i = 0; i < nritems; i++) { - if (level == 0) { - u64 disk_bytenr; - btrfs_item_key_to_cpu(buf, &key, i); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - if (disk_bytenr == 0) - continue; - ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(buf, fi), - root->root_key.objectid, trans->transid, - key.objectid, key.offset); - if (ret) { - faili = i; - goto fail; - } + ret = btrfs_insert_empty_item(trans, root, path, &key, size); + if (ret && ret != -EEXIST) + goto fail; + + leaf = path->nodes[0]; + if (parent) { + struct btrfs_shared_data_ref *ref; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_shared_data_ref); + if (ret == 0) { + btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); } else { - bytenr = btrfs_node_blockptr(buf, i); - btrfs_node_key_to_cpu(buf, &key, i); - ret = btrfs_inc_extent_ref(trans, root, bytenr, - btrfs_level_size(root, level - 1), - root->root_key.objectid, - trans->transid, - level - 1, key.objectid); - if (ret) { - faili = i; + num_refs = btrfs_shared_data_ref_count(leaf, ref); + num_refs += refs_to_add; + btrfs_set_shared_data_ref_count(leaf, ref, num_refs); + } + } else { + struct btrfs_extent_data_ref *ref; + while (ret == -EEXIST) { + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + if (match_extent_data_ref(leaf, ref, root_objectid, + owner, offset)) + break; + btrfs_release_path(path); + + key.offset++; + ret = btrfs_insert_empty_item(trans, root, path, &key, + size); + if (ret && ret != -EEXIST) goto fail; - } + + leaf = path->nodes[0]; } - } - return 0; -fail: - WARN_ON(1); -#if 0 - for (i =0; i < faili; i++) { - if (level == 0) { - u64 disk_bytenr; - btrfs_item_key_to_cpu(buf, &key, i); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - if (disk_bytenr == 0) - continue; - err = btrfs_free_extent(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(buf, - fi), 0); - BUG_ON(err); + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + if (ret == 0) { + btrfs_set_extent_data_ref_root(leaf, ref, + root_objectid); + btrfs_set_extent_data_ref_objectid(leaf, ref, owner); + btrfs_set_extent_data_ref_offset(leaf, ref, offset); + btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); } else { - bytenr = btrfs_node_blockptr(buf, i); - err = btrfs_free_extent(trans, root, bytenr, - btrfs_level_size(root, level - 1), 0); - BUG_ON(err); + num_refs = btrfs_extent_data_ref_count(leaf, ref); + num_refs += refs_to_add; + btrfs_set_extent_data_ref_count(leaf, ref, num_refs); } } -#endif + btrfs_mark_buffer_dirty(leaf); + ret = 0; +fail: + btrfs_release_path(path); return ret; } -static int write_one_cache_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_block_group_cache *cache) +static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + int refs_to_drop) { - int ret; - int pending_ret; - struct btrfs_root *extent_root = root->fs_info->extent_root; - unsigned long bi; + struct btrfs_key key; + struct btrfs_extent_data_ref *ref1 = NULL; + struct btrfs_shared_data_ref *ref2 = NULL; struct extent_buffer *leaf; - - ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); - if (ret < 0) - goto fail; - BUG_ON(ret); + u32 num_refs = 0; + int ret = 0; leaf = path->nodes[0]; - bi = btrfs_item_ptr_offset(leaf, path->slots[0]); - write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); - btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(extent_root, path); -fail: - finish_current_insert(trans, extent_root); - pending_ret = del_pending_extents(trans, extent_root); - if (ret) - return ret; - if (pending_ret) - return pending_ret; - return 0; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { + ref1 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + num_refs = btrfs_extent_data_ref_count(leaf, ref1); + } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { + ref2 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_shared_data_ref); + num_refs = btrfs_shared_data_ref_count(leaf, ref2); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { + struct btrfs_extent_ref_v0 *ref0; + ref0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref_v0); + num_refs = btrfs_ref_count_v0(leaf, ref0); +#endif + } else { + BUG(); + } + + BUG_ON(num_refs < refs_to_drop); + num_refs -= refs_to_drop; + if (num_refs == 0) { + ret = btrfs_del_item(trans, root, path); + } else { + if (key.type == BTRFS_EXTENT_DATA_REF_KEY) + btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); + else if (key.type == BTRFS_SHARED_DATA_REF_KEY) + btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + else { + struct btrfs_extent_ref_v0 *ref0; + ref0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref_v0); + btrfs_set_ref_count_v0(leaf, ref0, num_refs); + } +#endif + btrfs_mark_buffer_dirty(leaf); + } + return ret; } -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static noinline u32 extent_data_ref_count(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *cache; - int ret; - int err = 0; - int werr = 0; - struct btrfs_path *path; - u64 last = 0; - u64 start; - u64 end; - u64 ptr; - - block_group_cache = &root->fs_info->block_group_cache; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - while(1) { - ret = find_first_extent_bit(block_group_cache, last, - &start, &end, BLOCK_GROUP_DIRTY); - if (ret) - break; - - last = end + 1; - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - break; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_extent_data_ref *ref1; + struct btrfs_shared_data_ref *ref2; + u32 num_refs = 0; - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; - err = write_one_cache_group(trans, root, - path, cache); - /* - * if we fail to write the cache group, we want - * to keep it marked dirty in hopes that a later - * write will work - */ - if (err) { - werr = err; - continue; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (iref) { + if (btrfs_extent_inline_ref_type(leaf, iref) == + BTRFS_EXTENT_DATA_REF_KEY) { + ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); + num_refs = btrfs_extent_data_ref_count(leaf, ref1); + } else { + ref2 = (struct btrfs_shared_data_ref *)(iref + 1); + num_refs = btrfs_shared_data_ref_count(leaf, ref2); } - clear_extent_bits(block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); + } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { + ref1 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + num_refs = btrfs_extent_data_ref_count(leaf, ref1); + } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { + ref2 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_shared_data_ref); + num_refs = btrfs_shared_data_ref_count(leaf, ref2); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { + struct btrfs_extent_ref_v0 *ref0; + ref0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref_v0); + num_refs = btrfs_ref_count_v0(leaf, ref0); +#endif + } else { + BUG(); } - btrfs_free_path(path); - return werr; + return num_refs; } -static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, - u64 flags) +static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 root_objectid) { - struct list_head *head = &info->space_info; - struct list_head *cur; - struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); - if (found->flags == flags) - return found; + struct btrfs_key key; + int ret; + + key.objectid = bytenr; + if (parent) { + key.type = BTRFS_SHARED_BLOCK_REF_KEY; + key.offset = parent; + } else { + key.type = BTRFS_TREE_BLOCK_REF_KEY; + key.offset = root_objectid; } - return NULL; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -ENOENT; +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (ret == -ENOENT && parent) { + btrfs_release_path(path); + key.type = BTRFS_EXTENT_REF_V0_KEY; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -ENOENT; + } +#endif + return ret; } -static int update_space_info(struct btrfs_fs_info *info, u64 flags, - u64 total_bytes, u64 bytes_used, - struct btrfs_space_info **space_info) +static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 root_objectid) { - struct btrfs_space_info *found; + struct btrfs_key key; + int ret; - found = __find_space_info(info, flags); - if (found) { - found->total_bytes += total_bytes; - found->bytes_used += bytes_used; - WARN_ON(found->total_bytes < found->bytes_used); - *space_info = found; - return 0; + key.objectid = bytenr; + if (parent) { + key.type = BTRFS_SHARED_BLOCK_REF_KEY; + key.offset = parent; + } else { + key.type = BTRFS_TREE_BLOCK_REF_KEY; + key.offset = root_objectid; } - found = kmalloc(sizeof(*found), GFP_NOFS); - if (!found) - return -ENOMEM; - list_add(&found->list, &info->space_info); - found->flags = flags; - found->total_bytes = total_bytes; - found->bytes_used = bytes_used; - found->bytes_pinned = 0; - found->full = 0; - *space_info = found; - return 0; -} + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + btrfs_release_path(path); + return ret; +} -static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) +static inline int extent_ref_type(u64 parent, u64 owner) { - u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_DUP); - if (extra_flags) { - if (flags & BTRFS_BLOCK_GROUP_DATA) - fs_info->avail_data_alloc_bits |= extra_flags; - if (flags & BTRFS_BLOCK_GROUP_METADATA) - fs_info->avail_metadata_alloc_bits |= extra_flags; - if (flags & BTRFS_BLOCK_GROUP_SYSTEM) - fs_info->avail_system_alloc_bits |= extra_flags; + int type; + if (owner < BTRFS_FIRST_FREE_OBJECTID) { + if (parent > 0) + type = BTRFS_SHARED_BLOCK_REF_KEY; + else + type = BTRFS_TREE_BLOCK_REF_KEY; + } else { + if (parent > 0) + type = BTRFS_SHARED_DATA_REF_KEY; + else + type = BTRFS_EXTENT_DATA_REF_KEY; } + return type; } -static int do_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, u64 alloc_bytes, - u64 flags) +static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref **ref_ret, + u64 bytenr, u64 num_bytes, + u64 parent, u64 root_objectid, + u64 owner, u64 offset, int insert) { - struct btrfs_space_info *space_info; - u64 thresh; - u64 start; - u64 num_bytes; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + u64 flags; + u32 item_size; + unsigned long ptr; + unsigned long end; + int extra_size; + int type; + int want; int ret; + int err = 0; + int skinny_metadata = + btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); - space_info = __find_space_info(extent_root->fs_info, flags); - if (!space_info) { - ret = update_space_info(extent_root->fs_info, flags, - 0, 0, &space_info); - BUG_ON(ret); - } - BUG_ON(!space_info); - - if (space_info->full) - return 0; - - thresh = div_factor(space_info->total_bytes, 7); - if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < - thresh) - return 0; + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; - ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); - if (ret == -ENOSPC) { -printk("space info full %llu\n", (unsigned long long)flags); - space_info->full = 1; - return 0; + want = extent_ref_type(parent, owner); + if (insert) + extra_size = btrfs_extent_inline_ref_size(want); + else + extra_size = -1; + + if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) { + skinny_metadata = 1; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner; + } else if (skinny_metadata) { + skinny_metadata = 0; } - BUG_ON(ret); +again: + ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); + if (ret < 0) { + err = ret; + goto out; + } + + /* + * We may be a newly converted file system which still has the old fat + * extent entries for metadata, so try and see if we have one of those. + */ + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + if (ret) { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + btrfs_release_path(path); + goto again; + } + } + + if (ret) { + printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset); + return -ENOENT; + } - ret = btrfs_make_block_group(trans, extent_root, 0, flags, - extent_root->fs_info->chunk_root->root_key.objectid, - start, num_bytes); BUG_ON(ret); - return 0; -} -static int update_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, int alloc, - int mark_free) -{ - struct btrfs_block_group_cache *cache; - struct btrfs_fs_info *info = root->fs_info; - u64 total = num_bytes; - u64 old_val; - u64 byte_in_group; - u64 start; - u64 end; + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (item_size < sizeof(*ei)) { + if (!insert) { + err = -ENOENT; + goto out; + } + ret = convert_extent_item_v0(trans, root, path, owner, + extra_size); + if (ret < 0) { + err = ret; + goto out; + } + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + } +#endif + if (item_size < sizeof(*ei)) { + printf("Size is %u, needs to be %u, slot %d\n", + (unsigned)item_size, + (unsigned)sizeof(*ei), path->slots[0]); + btrfs_print_leaf(root, leaf); + return -EINVAL; + } + BUG_ON(item_size < sizeof(*ei)); - while(total) { - cache = btrfs_lookup_block_group(info, bytenr); - if (!cache) { - return -1; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + + ptr = (unsigned long)(ei + 1); + end = (unsigned long)ei + item_size; + + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) { + ptr += sizeof(struct btrfs_tree_block_info); + BUG_ON(ptr > end); + } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + if (!(flags & BTRFS_EXTENT_FLAG_DATA)) { + return -EIO; } - byte_in_group = bytenr - cache->key.objectid; - WARN_ON(byte_in_group > cache->key.offset); - start = cache->key.objectid; - end = start + cache->key.offset - 1; - set_extent_bits(&info->block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); + } - old_val = btrfs_block_group_used(&cache->item); - num_bytes = min(total, cache->key.offset - byte_in_group); - if (alloc) { - old_val += num_bytes; - cache->space_info->bytes_used += num_bytes; + err = -ENOENT; + while (1) { + if (ptr >= end) { + WARN_ON(ptr > end); + break; + } + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + if (want < type) + break; + if (want > type) { + ptr += btrfs_extent_inline_ref_size(type); + continue; + } + + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + struct btrfs_extent_data_ref *dref; + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + if (match_extent_data_ref(leaf, dref, root_objectid, + owner, offset)) { + err = 0; + break; + } + if (hash_extent_data_ref_item(leaf, dref) < + hash_extent_data_ref(root_objectid, owner, offset)) + break; } else { - old_val -= num_bytes; - cache->space_info->bytes_used -= num_bytes; - if (mark_free) { - set_extent_dirty(&info->free_space_cache, - bytenr, bytenr + num_bytes - 1, - GFP_NOFS); + u64 ref_offset; + ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); + if (parent > 0) { + if (parent == ref_offset) { + err = 0; + break; + } + if (ref_offset < parent) + break; + } else { + if (root_objectid == ref_offset) { + err = 0; + break; + } + if (ref_offset < root_objectid) + break; } } - btrfs_set_block_group_used(&cache->item, old_val); - total -= num_bytes; - bytenr += num_bytes; + ptr += btrfs_extent_inline_ref_size(type); } - return 0; + if (err == -ENOENT && insert) { + if (item_size + extra_size >= + BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { + err = -EAGAIN; + goto out; + } + /* + * To add new inline back ref, we have to make sure + * there is no corresponding back ref item. + * For simplicity, we just do not add new inline back + * ref if there is any back ref item. + */ + if (find_next_key(path, &key) == 0 && key.objectid == bytenr && + key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { + err = -EAGAIN; + goto out; + } + } + *ref_ret = (struct btrfs_extent_inline_ref *)ptr; +out: + return err; } -static int update_pinned_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int pin) +static int setup_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + u64 parent, u64 root_objectid, + u64 owner, u64 offset, int refs_to_add) { - u64 len; - struct btrfs_block_group_cache *cache; - struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + unsigned long ptr; + unsigned long end; + unsigned long item_offset; + u64 refs; + int size; + int type; + int ret; - if (pin) { - set_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + num - 1, GFP_NOFS); + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + item_offset = (unsigned long)iref - (unsigned long)ei; + + type = extent_ref_type(parent, owner); + size = btrfs_extent_inline_ref_size(type); + + ret = btrfs_extend_item(trans, root, path, size); + BUG_ON(ret); + + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(leaf, ei); + refs += refs_to_add; + btrfs_set_extent_refs(leaf, ei, refs); + + ptr = (unsigned long)ei + item_offset; + end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); + if (ptr < end - size) + memmove_extent_buffer(leaf, ptr + size, ptr, + end - size - ptr); + + iref = (struct btrfs_extent_inline_ref *)ptr; + btrfs_set_extent_inline_ref_type(leaf, iref, type); + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + struct btrfs_extent_data_ref *dref; + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); + btrfs_set_extent_data_ref_objectid(leaf, dref, owner); + btrfs_set_extent_data_ref_offset(leaf, dref, offset); + btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); + } else if (type == BTRFS_SHARED_DATA_REF_KEY) { + struct btrfs_shared_data_ref *sref; + sref = (struct btrfs_shared_data_ref *)(iref + 1); + btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); + btrfs_set_extent_inline_ref_offset(leaf, iref, parent); + } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { + btrfs_set_extent_inline_ref_offset(leaf, iref, parent); } else { - clear_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + num - 1, GFP_NOFS); - } - while (num > 0) { - cache = btrfs_lookup_block_group(fs_info, bytenr); - WARN_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); - if (pin) { - cache->pinned += len; - cache->space_info->bytes_pinned += len; - fs_info->total_pinned += len; - } else { - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; - fs_info->total_pinned -= len; - } - bytenr += len; - num -= len; + btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); } + btrfs_mark_buffer_dirty(leaf); return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) +static int lookup_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref **ref_ret, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset) { - u64 last = 0; - u64 start; - u64 end; - struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; - while(1) { - ret = find_first_extent_bit(pinned_extents, last, - &start, &end, EXTENT_DIRTY); - if (ret) - break; - set_extent_dirty(copy, start, end, GFP_NOFS); - last = end + 1; + ret = lookup_inline_extent_backref(trans, root, path, ref_ret, + bytenr, num_bytes, parent, + root_objectid, owner, offset, 0); + if (ret != -ENOENT) + return ret; + + btrfs_release_path(path); + *ref_ret = NULL; + + if (owner < BTRFS_FIRST_FREE_OBJECTID) { + ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, + root_objectid); + } else { + ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, + root_objectid, owner, offset); } - return 0; + return ret; } -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_io_tree *unpin) +static int update_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + int refs_to_mod) { - u64 start; - u64 end; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_data_ref *dref = NULL; + struct btrfs_shared_data_ref *sref = NULL; + unsigned long ptr; + unsigned long end; + u32 item_size; + int size; + int type; int ret; - struct extent_io_tree *free_space_cache; - free_space_cache = &root->fs_info->free_space_cache; + u64 refs; - while(1) { - ret = find_first_extent_bit(unpin, 0, &start, &end, - EXTENT_DIRTY); - if (ret) - break; - update_pinned_extents(root, start, end + 1 - start, 0); - clear_extent_dirty(unpin, start, end, GFP_NOFS); - set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(leaf, ei); + WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); + refs += refs_to_mod; + btrfs_set_extent_refs(leaf, ei, refs); + + type = btrfs_extent_inline_ref_type(leaf, iref); + + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + refs = btrfs_extent_data_ref_count(leaf, dref); + } else if (type == BTRFS_SHARED_DATA_REF_KEY) { + sref = (struct btrfs_shared_data_ref *)(iref + 1); + refs = btrfs_shared_data_ref_count(leaf, sref); + } else { + refs = 1; + BUG_ON(refs_to_mod != -1); + } + + BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); + refs += refs_to_mod; + + if (refs > 0) { + if (type == BTRFS_EXTENT_DATA_REF_KEY) + btrfs_set_extent_data_ref_count(leaf, dref, refs); + else + btrfs_set_shared_data_ref_count(leaf, sref, refs); + } else { + size = btrfs_extent_inline_ref_size(type); + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + if (ptr + size < end) + memmove_extent_buffer(leaf, ptr, ptr + size, + end - ptr - size); + item_size -= size; + ret = btrfs_truncate_item(trans, root, path, item_size, 1); + BUG_ON(ret); } + btrfs_mark_buffer_dirty(leaf); return 0; } -static int finish_current_insert(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root) +static int insert_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, + u64 offset, int refs_to_add) { - u64 start; - u64 end; - struct btrfs_fs_info *info = extent_root->fs_info; - struct extent_buffer *eb; - struct btrfs_path *path; - struct btrfs_key ins; - struct btrfs_disk_key first; - struct btrfs_extent_item extent_item; + struct btrfs_extent_inline_ref *iref; int ret; - int level; - int err = 0; - btrfs_set_stack_extent_refs(&extent_item, 1); - btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - path = btrfs_alloc_path(); + ret = lookup_inline_extent_backref(trans, root, path, &iref, + bytenr, num_bytes, parent, + root_objectid, owner, offset, 1); + if (ret == 0) { + BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); + ret = update_inline_extent_backref(trans, root, path, iref, + refs_to_add); + } else if (ret == -ENOENT) { + ret = setup_inline_extent_backref(trans, root, path, iref, + parent, root_objectid, + owner, offset, refs_to_add); + } + return ret; +} - while(1) { - ret = find_first_extent_bit(&info->extent_ins, 0, &start, - &end, EXTENT_LOCKED); - if (ret) - break; +static int insert_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, u64 root_objectid, + u64 owner, u64 offset, int refs_to_add) +{ + int ret; - ins.objectid = start; - ins.offset = end + 1 - start; - err = btrfs_insert_item(trans, extent_root, &ins, - &extent_item, sizeof(extent_item)); - clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, - GFP_NOFS); - eb = read_tree_block(extent_root, ins.objectid, ins.offset); - level = btrfs_header_level(eb); - if (level == 0) { - btrfs_item_key(eb, &first, 0); - } else { - btrfs_node_key(eb, &first, 0); - } - err = btrfs_insert_extent_backref(trans, extent_root, path, - start, extent_root->root_key.objectid, - 0, level, - btrfs_disk_key_objectid(&first)); - BUG_ON(err); - free_extent_buffer(eb); + if (owner >= BTRFS_FIRST_FREE_OBJECTID) { + ret = insert_extent_data_ref(trans, root, path, bytenr, + parent, root_objectid, + owner, offset, refs_to_add); + } else { + BUG_ON(refs_to_add != 1); + ret = insert_tree_block_ref(trans, root, path, bytenr, + parent, root_objectid); } - btrfs_free_path(path); - return 0; + return ret; } -static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, - int pending) +static int remove_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + int refs_to_drop, int is_data) { - int err = 0; - struct extent_buffer *buf; + int ret; - if (!pending) { - buf = btrfs_find_tree_block(root, bytenr, num_bytes); - if (buf) { - if (btrfs_buffer_uptodate(buf)) { - u64 transid = - root->fs_info->running_transaction->transid; - if (btrfs_header_generation(buf) == - transid && !btrfs_header_flag(buf, - BTRFS_HEADER_FLAG_WRITTEN)) { - free_extent_buffer(buf); - return 1; - } - } - free_extent_buffer(buf); - } - update_pinned_extents(root, bytenr, num_bytes, 1); + BUG_ON(!is_data && refs_to_drop != 1); + if (iref) { + ret = update_inline_extent_backref(trans, root, path, iref, + -refs_to_drop); + } else if (is_data) { + ret = remove_extent_data_ref(trans, root, path, refs_to_drop); } else { - set_extent_bits(&root->fs_info->pending_del, - bytenr, bytenr + num_bytes - 1, - EXTENT_LOCKED, GFP_NOFS); + ret = btrfs_del_item(trans, root, path); } - BUG_ON(err < 0); - return 0; + return ret; } -/* - * remove an extent from the root, returns 0 on success - */ -static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin, - int mark_free) +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset) { struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_fs_info *info = root->fs_info; - struct btrfs_extent_ops *ops = info->extent_ops; - struct btrfs_root *extent_root = info->extent_root; struct extent_buffer *leaf; + struct btrfs_extent_item *item; + u64 refs; int ret; - int extent_slot = 0; - int found_extent = 0; - int num_to_del = 1; - struct btrfs_extent_item *ei; - u32 refs; - - key.objectid = bytenr; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_bytes; + int err = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = lookup_extent_backref(trans, extent_root, path, - bytenr, root_objectid, - ref_generation, - owner_objectid, owner_offset, 1); - if (ret == 0) { - struct btrfs_key found_key; - extent_slot = path->slots[0]; - while(extent_slot > 0) { - extent_slot--; - btrfs_item_key_to_cpu(path->nodes[0], &found_key, - extent_slot); - if (found_key.objectid != bytenr) - break; - if (found_key.type == BTRFS_EXTENT_ITEM_KEY && - found_key.offset == num_bytes) { - found_extent = 1; - break; - } - if (path->slots[0] - extent_slot > 5) - break; - } - if (!found_extent) - ret = btrfs_del_item(trans, extent_root, path); - } else { - btrfs_print_leaf(extent_root, path->nodes[0]); - WARN_ON(1); - printk("Unable to find ref byte nr %llu root %llu " - " gen %llu owner %llu offset %llu\n", - (unsigned long long)bytenr, - (unsigned long long)root_objectid, - (unsigned long long)ref_generation, - (unsigned long long)owner_objectid, - (unsigned long long)owner_offset); - } - if (!found_extent) { - btrfs_release_path(extent_root, path); - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); - if (ret < 0) - return ret; - BUG_ON(ret); - extent_slot = path->slots[0]; - } + path->reada = 1; + + ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, + path, bytenr, num_bytes, parent, + root_objectid, owner, offset, 1); + if (ret == 0) + goto out; + if (ret != -EAGAIN) { + err = ret; + goto out; + } + leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, extent_slot, - struct btrfs_extent_item); - refs = btrfs_extent_refs(leaf, ei); - BUG_ON(refs == 0); - refs -= 1; - btrfs_set_extent_refs(leaf, ei, refs); + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(leaf, item); + btrfs_set_extent_refs(leaf, item, refs + 1); btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(path); - if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { - /* if the back ref and the extent are next to each other - * they get deleted below in one shot - */ - path->slots[0] = extent_slot; - num_to_del = 2; - } else if (found_extent) { - /* otherwise delete the extent back ref */ - ret = btrfs_del_item(trans, extent_root, path); - BUG_ON(ret); - /* if refs are 0, we need to setup the path for deletion */ - if (refs == 0) { - btrfs_release_path(extent_root, path); - ret = btrfs_search_slot(trans, extent_root, &key, path, - -1, 1); - if (ret < 0) - return ret; - BUG_ON(ret); - } + path->reada = 1; + + /* now insert the actual backref */ + ret = insert_extent_backref(trans, root->fs_info->extent_root, + path, bytenr, parent, root_objectid, + owner, offset, 1); + if (ret) + err = ret; +out: + btrfs_free_path(path); + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + BUG_ON(err); + return err; +} + +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + return 0; +} + +int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 offset, int metadata, u64 *refs, u64 *flags) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct extent_buffer *l; + struct btrfs_extent_item *item; + u32 item_size; + u64 num_refs; + u64 extent_flags; + + if (metadata && + !btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) { + offset = root->nodesize; + metadata = 0; } - if (refs == 0) { - u64 super_used; - u64 root_used; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = 1; - if (pin) { - ret = pin_down_bytes(root, bytenr, num_bytes, 0); - if (ret > 0) - mark_free = 1; - BUG_ON(ret < 0); - } + key.objectid = bytenr; + key.offset = offset; + if (metadata) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; - /* block accounting for super block */ - super_used = btrfs_super_bytes_used(&info->super_copy); - btrfs_set_super_bytes_used(&info->super_copy, - super_used - num_bytes); +again: + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, + 0, 0); + if (ret < 0) + goto out; - /* block accounting for root item */ - root_used = btrfs_root_used(&root->root_item); - btrfs_set_root_used(&root->root_item, - root_used - num_bytes); - ret = btrfs_del_items(trans, extent_root, path, path->slots[0], - num_to_del); - if (ret) - return ret; + /* + * Deal with the fact that we may have mixed SKINNY and normal refs. If + * we didn't find what we wanted check and see if we have a normal ref + * right next to us, or re-search if we are on the edge of the leaf just + * to make sure. + */ + if (ret > 0 && metadata) { + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == root->nodesize) + ret = 0; + } + + if (ret) { + btrfs_release_path(path); + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->nodesize; + metadata = 0; + goto again; + } + } - if (ops && ops->free_extent) - ops->free_extent(root, bytenr, num_bytes); + if (ret != 0) { + ret = -EIO; + goto out; + } - ret = update_block_group(trans, root, bytenr, num_bytes, 0, - mark_free); - BUG_ON(ret); + l = path->nodes[0]; + item_size = btrfs_item_size_nr(l, path->slots[0]); + if (item_size >= sizeof(*item)) { + item = btrfs_item_ptr(l, path->slots[0], + struct btrfs_extent_item); + num_refs = btrfs_extent_refs(l, item); + extent_flags = btrfs_extent_flags(l, item); + } else { +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + struct btrfs_extent_item_v0 *ei0; + BUG_ON(item_size != sizeof(*ei0)); + ei0 = btrfs_item_ptr(l, path->slots[0], + struct btrfs_extent_item_v0); + num_refs = btrfs_extent_refs_v0(l, ei0); + /* FIXME: this isn't correct for data */ + extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; +#else + BUG(); +#endif } + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + if (refs) + *refs = num_refs; + if (flags) + *flags = extent_flags; +out: btrfs_free_path(path); - finish_current_insert(trans, extent_root); return ret; } -/* - * find all the blocks marked as pending in the radix tree and remove - * them from the extent map - */ -static int del_pending_extents(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root) +int btrfs_set_block_flags(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, int level, u64 flags) { + struct btrfs_path *path; int ret; - int err = 0; - u64 start; - u64 end; - struct extent_io_tree *pending_del; - struct extent_io_tree *pinned_extents; + struct btrfs_key key; + struct extent_buffer *l; + struct btrfs_extent_item *item; + u32 item_size; + int skinny_metadata = + btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); - pending_del = &extent_root->fs_info->pending_del; - pinned_extents = &extent_root->fs_info->pinned_extents; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = 1; - while(1) { - ret = find_first_extent_bit(pending_del, 0, &start, &end, - EXTENT_LOCKED); - if (ret) - break; - update_pinned_extents(extent_root, start, end + 1 - start, 1); - clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, - GFP_NOFS); - ret = __free_extent(trans, extent_root, - start, end + 1 - start, - extent_root->root_key.objectid, - 0, 0, 0, 0, 0); - if (ret) - err = ret; + key.objectid = bytenr; + if (skinny_metadata) { + key.offset = level; + key.type = BTRFS_METADATA_ITEM_KEY; + } else { + key.offset = root->nodesize; + key.type = BTRFS_EXTENT_ITEM_KEY; } - return err; -} -/* - * remove an extent from the root, returns 0 on success - */ -int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin) -{ - struct btrfs_root *extent_root = root->fs_info->extent_root; - int pending_ret; - int ret; +again: + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, + 0, 0); + if (ret < 0) + goto out; - WARN_ON(num_bytes < root->sectorsize); - if (!root->ref_cows) - ref_generation = 0; + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.offset == root->nodesize && + key.type == BTRFS_EXTENT_ITEM_KEY) + ret = 0; + } + if (ret) { + btrfs_release_path(path); + key.offset = root->nodesize; + key.type = BTRFS_EXTENT_ITEM_KEY; + goto again; + } + } - if (root == extent_root) { - pin_down_bytes(root, bytenr, num_bytes, 1); - return 0; + if (ret != 0) { + btrfs_print_leaf(root, path->nodes[0]); + printk("failed to find block number %Lu\n", + (unsigned long long)bytenr); + BUG(); } - ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, - ref_generation, owner_objectid, owner_offset, - pin, pin == 0); - pending_ret = del_pending_extents(trans, root->fs_info->extent_root); - return ret ? ret : pending_ret; -} + l = path->nodes[0]; + item_size = btrfs_item_size_nr(l, path->slots[0]); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (item_size < sizeof(*item)) { + ret = convert_extent_item_v0(trans, root->fs_info->extent_root, + path, (u64)-1, 0); + if (ret < 0) + goto out; -static u64 stripe_align(struct btrfs_root *root, u64 val) -{ - u64 mask = ((u64)root->stripesize - 1); - u64 ret = (val + mask) & ~mask; + l = path->nodes[0]; + item_size = btrfs_item_size_nr(l, path->slots[0]); + } +#endif + BUG_ON(item_size < sizeof(*item)); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + flags |= btrfs_extent_flags(l, item); + btrfs_set_extent_flags(l, item, flags); +out: + btrfs_free_path(path); + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); return ret; } -/* - * walks the btree of allocated extents and find a hole of a given size. - * The key ins is changed to record the hole: - * ins->objectid == block start - * ins->flags = BTRFS_EXTENT_ITEM_KEY - * ins->offset == number of blocks - * Any available blocks before search_start are skipped. - */ -static int noinline find_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *orig_root, - u64 num_bytes, u64 empty_size, - u64 search_start, u64 search_end, - u64 hint_byte, struct btrfs_key *ins, - u64 exclude_start, u64 exclude_nr, - int data) +static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + int record_parent, int inc) { - int ret; - u64 orig_search_start = search_start; - struct btrfs_root * root = orig_root->fs_info->extent_root; - struct btrfs_fs_info *info = root->fs_info; - u64 total_needed = num_bytes; - struct btrfs_block_group_cache *block_group; - int full_scan = 0; - int wrapped = 0; + u64 bytenr; + u64 num_bytes; + u64 parent; + u64 ref_root; + u32 nritems; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + int i; + int level; + int ret = 0; + int (*process_func)(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64, u64, u64, u64, u64, u64); - WARN_ON(num_bytes < root->sectorsize); - btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + ref_root = btrfs_header_owner(buf); + nritems = btrfs_header_nritems(buf); + level = btrfs_header_level(buf); - if (search_end == (u64)-1) - search_end = btrfs_super_total_bytes(&info->super_copy); + if (!root->ref_cows && level == 0) + return 0; - if (hint_byte) { - block_group = btrfs_lookup_block_group(info, hint_byte); - if (!block_group) - hint_byte = search_start; - block_group = btrfs_find_block_group(root, block_group, - hint_byte, data, 1); - } else { - block_group = btrfs_find_block_group(root, - trans->block_group, - search_start, data, 1); - } + if (inc) + process_func = btrfs_inc_extent_ref; + else + process_func = btrfs_free_extent; - total_needed += empty_size; + if (record_parent) + parent = buf->start; + else + parent = 0; -check_failed: - if (!block_group) { - block_group = btrfs_lookup_block_group(info, search_start); - if (!block_group) - block_group = btrfs_lookup_block_group(info, - orig_search_start); + for (i = 0; i < nritems; i++) { + cond_resched(); + if (level == 0) { + btrfs_item_key_to_cpu(buf, &key, i); + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(buf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (bytenr == 0) + continue; + + num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); + key.offset -= btrfs_file_extent_offset(buf, fi); + ret = process_func(trans, root, bytenr, num_bytes, + parent, ref_root, key.objectid, + key.offset); + if (ret) { + WARN_ON(1); + goto fail; + } + } else { + bytenr = btrfs_node_blockptr(buf, i); + num_bytes = root->nodesize; + ret = process_func(trans, root, bytenr, num_bytes, + parent, ref_root, level - 1, 0); + if (ret) { + WARN_ON(1); + goto fail; + } + } } - ret = find_search_start(root, &block_group, &search_start, - total_needed, data); - if (ret) - goto error; - - search_start = stripe_align(root, search_start); - ins->objectid = search_start; - ins->offset = num_bytes; - - if (ins->objectid + num_bytes >= search_end) - goto enospc; + return 0; +fail: + WARN_ON(1); + return ret; +} - if (ins->objectid + num_bytes > - block_group->key.objectid + block_group->key.offset) { - search_start = block_group->key.objectid + - block_group->key.offset; - goto new_group; - } +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, int record_parent) +{ + return __btrfs_mod_ref(trans, root, buf, record_parent, 1); +} - if (test_range_bit(&info->extent_ins, ins->objectid, - ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { - search_start = ins->objectid + num_bytes; - goto new_group; - } +int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, int record_parent) +{ + return __btrfs_mod_ref(trans, root, buf, record_parent, 0); +} - if (test_range_bit(&info->pinned_extents, ins->objectid, - ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { - search_start = ins->objectid + num_bytes; - goto new_group; - } +static int write_one_cache_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_block_group_cache *cache) +{ + int ret; + int pending_ret; + struct btrfs_root *extent_root = root->fs_info->extent_root; + unsigned long bi; + struct extent_buffer *leaf; - if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && - ins->objectid < exclude_start + exclude_nr)) { - search_start = exclude_start + exclude_nr; - goto new_group; - } + ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); + if (ret < 0) + goto fail; + BUG_ON(ret); - if (!(data & BTRFS_BLOCK_GROUP_DATA)) { - block_group = btrfs_lookup_block_group(info, ins->objectid); - if (block_group) - trans->block_group = block_group; - } - ins->offset = num_bytes; + leaf = path->nodes[0]; + bi = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(path); +fail: + finish_current_insert(trans, extent_root); + pending_ret = del_pending_extents(trans, extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; return 0; -new_group: - if (search_start + num_bytes >= search_end) { -enospc: - search_start = orig_search_start; - if (full_scan) { - ret = -ENOSPC; - goto error; - } - if (wrapped) { - if (!full_scan) - total_needed -= empty_size; - full_scan = 1; - } else - wrapped = 1; - } - block_group = btrfs_lookup_block_group(info, search_start); - cond_resched(); - block_group = btrfs_find_block_group(root, block_group, - search_start, data, 0); - goto check_failed; - -error: - return ret; } -/* - * finds a free extent and does all the dirty work required for allocation - * returns the key for the extent through ins, and a tree buffer for - * the first block of the extent through buf. - * - * returns 0 if everything worked, non-zero otherwise. - */ -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, int data) + +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { + struct extent_io_tree *block_group_cache; + struct btrfs_block_group_cache *cache; int ret; - int pending_ret; - u64 super_used, root_used; - u64 search_start = 0; - u64 alloc_profile; - struct btrfs_fs_info *info = root->fs_info; - struct btrfs_extent_ops *ops = info->extent_ops; - u32 sizes[2]; - struct btrfs_root *extent_root = info->extent_root; struct btrfs_path *path; - struct btrfs_extent_item *extent_item; - struct btrfs_extent_ref *ref; - struct btrfs_key keys[2]; + u64 last = 0; + u64 start; + u64 end; + u64 ptr; - if (data) { - alloc_profile = info->avail_data_alloc_bits & - info->data_alloc_profile; - data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; - } else if (root == info->chunk_root || info->force_system_allocs) { - alloc_profile = info->avail_system_alloc_bits & - info->system_alloc_profile; - data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; - } else { - alloc_profile = info->avail_metadata_alloc_bits & - info->metadata_alloc_profile; - data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; - } + block_group_cache = &root->fs_info->block_group_cache; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; - if (root->ref_cows) { - if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { - ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, - BTRFS_BLOCK_GROUP_METADATA); - BUG_ON(ret); + while(1) { + ret = find_first_extent_bit(block_group_cache, last, + &start, &end, BLOCK_GROUP_DIRTY); + if (ret) { + if (last == 0) + break; + last = 0; + continue; } - ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data); - BUG_ON(ret); - } - - WARN_ON(num_bytes < root->sectorsize); - if (ops && ops->alloc_extent) { - ret = ops->alloc_extent(root, num_bytes, hint_byte, ins); - } else { - ret = find_free_extent(trans, root, num_bytes, empty_size, - search_start, search_end, hint_byte, - ins, trans->alloc_exclude_start, - trans->alloc_exclude_nr, data); - } - BUG_ON(ret); - if (ret) - return ret; - - /* block accounting for super block */ - super_used = btrfs_super_bytes_used(&info->super_copy); - btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); - /* block accounting for root item */ - root_used = btrfs_root_used(&root->root_item); - btrfs_set_root_used(&root->root_item, root_used + num_bytes); + last = end + 1; + ret = get_state_private(block_group_cache, start, &ptr); + BUG_ON(ret); - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); + clear_extent_bits(block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); - if (root == extent_root) { - set_extent_bits(&root->fs_info->extent_ins, ins->objectid, - ins->objectid + ins->offset - 1, - EXTENT_LOCKED, GFP_NOFS); - goto update_block; + cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; + ret = write_one_cache_group(trans, root, path, cache); } + btrfs_free_path(path); + return 0; +} - WARN_ON(trans->alloc_exclude_nr); - trans->alloc_exclude_start = ins->objectid; - trans->alloc_exclude_nr = ins->offset; - - memcpy(&keys[0], ins, sizeof(*ins)); - keys[1].offset = hash_extent_ref(root_objectid, ref_generation, - owner, owner_offset); - keys[1].objectid = ins->objectid; - keys[1].type = BTRFS_EXTENT_REF_KEY; - sizes[0] = sizeof(*extent_item); - sizes[1] = sizeof(*ref); - - path = btrfs_alloc_path(); - BUG_ON(!path); - - ret = btrfs_insert_empty_items(trans, extent_root, path, keys, - sizes, 2); +static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, + u64 flags) +{ + struct btrfs_space_info *found; - BUG_ON(ret); - extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_item); - btrfs_set_extent_refs(path->nodes[0], extent_item, 1); - ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, - struct btrfs_extent_ref); + flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; - btrfs_set_ref_root(path->nodes[0], ref, root_objectid); - btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); - btrfs_set_ref_objectid(path->nodes[0], ref, owner); - btrfs_set_ref_offset(path->nodes[0], ref, owner_offset); + list_for_each_entry(found, &info->space_info, list) { + if (found->flags & flags) + return found; + } + return NULL; - btrfs_mark_buffer_dirty(path->nodes[0]); +} - trans->alloc_exclude_start = 0; - trans->alloc_exclude_nr = 0; - btrfs_free_path(path); - finish_current_insert(trans, extent_root); - pending_ret = del_pending_extents(trans, extent_root); +static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; - if (ret) { - return ret; - } - if (pending_ret) { - return pending_ret; - } + /* only support free block group which is empty */ + if (bytes_used) + return -ENOTEMPTY; -update_block: - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); - if (ret) { - printk("update block group failed for %llu %llu\n", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); - BUG(); + found = __find_space_info(fs_info, flags); + if (!found) + return -ENOENT; + if (found->total_bytes < total_bytes) { + fprintf(stderr, + "WARNING: bad space info to free %llu only have %llu\n", + total_bytes, found->total_bytes); + return -EINVAL; } + found->total_bytes -= total_bytes; + if (space_info) + *space_info = found; return 0; } -/* - * helper function to allocate a block for a given tree - * returns the tree buffer or NULL. - */ -struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u32 blocksize, - u64 root_objectid, u64 hint, - u64 empty_size) +static int update_space_info(struct btrfs_fs_info *info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) { - u64 ref_generation; - - if (root->ref_cows) - ref_generation = trans->transid; - else - ref_generation = 0; + struct btrfs_space_info *found; + found = __find_space_info(info, flags); + if (found) { + found->total_bytes += total_bytes; + found->bytes_used += bytes_used; + if (found->total_bytes < found->bytes_used) { + fprintf(stderr, "warning, bad space info total_bytes " + "%llu used %llu\n", + (unsigned long long)found->total_bytes, + (unsigned long long)found->bytes_used); + } + *space_info = found; + return 0; + } + found = kmalloc(sizeof(*found), GFP_NOFS); + if (!found) + return -ENOMEM; - return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid, - ref_generation, 0, 0, hint, empty_size); + list_add(&found->list, &info->space_info); + found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; + found->total_bytes = total_bytes; + found->bytes_used = bytes_used; + found->bytes_pinned = 0; + found->full = 0; + *space_info = found; + return 0; } -/* - * helper function to allocate a block for a given tree - * returns the tree buffer or NULL. - */ -struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u32 blocksize, - u64 root_objectid, - u64 ref_generation, - u64 first_objectid, - int level, - u64 hint, - u64 empty_size) -{ - struct btrfs_key ins; - int ret; - struct extent_buffer *buf; - ret = btrfs_alloc_extent(trans, root, blocksize, - root_objectid, ref_generation, - level, first_objectid, empty_size, hint, - (u64)-1, &ins, 0); - if (ret) { - BUG_ON(ret > 0); - return ERR_PTR(ret); - } - buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); - if (!buf) { - btrfs_free_extent(trans, root, ins.objectid, blocksize, - root->root_key.objectid, ref_generation, - 0, 0, 0); - BUG_ON(1); - return ERR_PTR(-ENOMEM); +static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) +{ + u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6 | + BTRFS_BLOCK_GROUP_DUP); + if (extra_flags) { + if (flags & BTRFS_BLOCK_GROUP_DATA) + fs_info->avail_data_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_METADATA) + fs_info->avail_metadata_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + fs_info->avail_system_alloc_bits |= extra_flags; } - btrfs_set_buffer_uptodate(buf); - trans->blocks_used++; - return buf; } -static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *leaf) +static int do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 alloc_bytes, + u64 flags) { - u64 leaf_owner; - u64 leaf_generation; - struct btrfs_key key; - struct btrfs_file_extent_item *fi; - int i; - int nritems; + struct btrfs_space_info *space_info; + u64 thresh; + u64 start; + u64 num_bytes; int ret; - BUG_ON(!btrfs_is_leaf(leaf)); - nritems = btrfs_header_nritems(leaf); - leaf_owner = btrfs_header_owner(leaf); - leaf_generation = btrfs_header_generation(leaf); + space_info = __find_space_info(extent_root->fs_info, flags); + if (!space_info) { + ret = update_space_info(extent_root->fs_info, flags, + 0, 0, &space_info); + BUG_ON(ret); + } + BUG_ON(!space_info); - for (i = 0; i < nritems; i++) { - u64 disk_bytenr; + if (space_info->full) + return 0; - btrfs_item_key_to_cpu(leaf, &key, i); + thresh = div_factor(space_info->total_bytes, 7); + if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < + thresh) + return 0; + + /* + * Avoid allocating given chunk type + */ + if (extent_root->fs_info->avoid_meta_chunk_alloc && + (flags & BTRFS_BLOCK_GROUP_METADATA)) + return 0; + if (extent_root->fs_info->avoid_sys_chunk_alloc && + (flags & BTRFS_BLOCK_GROUP_SYSTEM)) + return 0; + + ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, + space_info->flags); + if (ret == -ENOSPC) { + space_info->full = 1; + return 0; + } + + BUG_ON(ret); + + ret = btrfs_make_block_group(trans, extent_root, 0, space_info->flags, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); + BUG_ON(ret); + return 0; +} + +static int update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int alloc, + int mark_free) +{ + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + u64 total = num_bytes; + u64 old_val; + u64 byte_in_group; + u64 start; + u64 end; + + /* block accounting for super block */ + old_val = btrfs_super_bytes_used(info->super_copy); + if (alloc) + old_val += num_bytes; + else + old_val -= num_bytes; + btrfs_set_super_bytes_used(info->super_copy, old_val); + + /* block accounting for root item */ + old_val = btrfs_root_used(&root->root_item); + if (alloc) + old_val += num_bytes; + else + old_val -= num_bytes; + btrfs_set_root_used(&root->root_item, old_val); + + while(total) { + cache = btrfs_lookup_block_group(info, bytenr); + if (!cache) { + return -1; + } + byte_in_group = bytenr - cache->key.objectid; + WARN_ON(byte_in_group > cache->key.offset); + start = cache->key.objectid; + end = start + cache->key.offset - 1; + set_extent_bits(&info->block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); + + old_val = btrfs_block_group_used(&cache->item); + num_bytes = min(total, cache->key.offset - byte_in_group); + + if (alloc) { + old_val += num_bytes; + cache->space_info->bytes_used += num_bytes; + } else { + old_val -= num_bytes; + cache->space_info->bytes_used -= num_bytes; + if (mark_free) { + set_extent_dirty(&info->free_space_cache, + bytenr, bytenr + num_bytes - 1, + GFP_NOFS); + } + } + btrfs_set_block_group_used(&cache->item, old_val); + total -= num_bytes; + bytenr += num_bytes; + } + return 0; +} + +static int update_pinned_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int pin) +{ + u64 len; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + if (pin) { + set_extent_dirty(&fs_info->pinned_extents, + bytenr, bytenr + num - 1, GFP_NOFS); + } else { + clear_extent_dirty(&fs_info->pinned_extents, + bytenr, bytenr + num - 1, GFP_NOFS); + } + while (num > 0) { + cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!cache) { + len = min((u64)root->sectorsize, num); + goto next; + } + WARN_ON(!cache); + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + if (pin) { + cache->pinned += len; + cache->space_info->bytes_pinned += len; + fs_info->total_pinned += len; + } else { + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + fs_info->total_pinned -= len; + } +next: + bytenr += len; + num -= len; + } + return 0; +} + +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_io_tree *unpin) +{ + u64 start; + u64 end; + int ret; + struct extent_io_tree *free_space_cache; + free_space_cache = &root->fs_info->free_space_cache; + + while(1) { + ret = find_first_extent_bit(unpin, 0, &start, &end, + EXTENT_DIRTY); + if (ret) + break; + update_pinned_extents(root, start, end + 1 - start, 0); + clear_extent_dirty(unpin, start, end, GFP_NOFS); + set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + } + return 0; +} + +static int extent_root_pending_ops(struct btrfs_fs_info *info) +{ + u64 start; + u64 end; + int ret; + + ret = find_first_extent_bit(&info->extent_ins, 0, &start, + &end, EXTENT_LOCKED); + if (!ret) { + ret = find_first_extent_bit(&info->pending_del, 0, &start, &end, + EXTENT_LOCKED); + } + return ret == 0; + +} +static int finish_current_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root) +{ + u64 start; + u64 end; + u64 priv; + struct btrfs_fs_info *info = extent_root->fs_info; + struct pending_extent_op *extent_op; + struct btrfs_key key; + int ret; + int skinny_metadata = + btrfs_fs_incompat(extent_root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + + while(1) { + ret = find_first_extent_bit(&info->extent_ins, 0, &start, + &end, EXTENT_LOCKED); + if (ret) + break; + + ret = get_state_private(&info->extent_ins, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *)(unsigned long)priv; + + if (extent_op->type == PENDING_EXTENT_INSERT) { + key.objectid = start; + if (skinny_metadata) { + key.offset = extent_op->level; + key.type = BTRFS_METADATA_ITEM_KEY; + } else { + key.offset = extent_op->num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + } + ret = alloc_reserved_tree_block(trans, extent_root, + extent_root->root_key.objectid, + trans->transid, + extent_op->flags, + &extent_op->key, + extent_op->level, &key); + BUG_ON(ret); + } else { + BUG_ON(1); + } + + clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, + GFP_NOFS); + kfree(extent_op); + } + return 0; +} + +static int pin_down_bytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int is_data) +{ + int err = 0; + struct extent_buffer *buf; + + if (is_data) + goto pinit; + + buf = btrfs_find_tree_block(root, bytenr, num_bytes); + if (!buf) + goto pinit; + + /* we can reuse a block if it hasn't been written + * and it is from this transaction. We can't + * reuse anything from the tree log root because + * it has tiny sub-transactions. + */ + if (btrfs_buffer_uptodate(buf, 0)) { + u64 header_owner = btrfs_header_owner(buf); + u64 header_transid = btrfs_header_generation(buf); + if (header_owner != BTRFS_TREE_LOG_OBJECTID && + header_transid == trans->transid && + !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { + clean_tree_block(NULL, root, buf); + free_extent_buffer(buf); + return 1; + } + } + free_extent_buffer(buf); +pinit: + update_pinned_extents(root, bytenr, num_bytes, 1); + + BUG_ON(err < 0); + return 0; +} + +void btrfs_pin_extent(struct btrfs_fs_info *fs_info, + u64 bytenr, u64 num_bytes) +{ + update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 1); +} + +void btrfs_unpin_extent(struct btrfs_fs_info *fs_info, + u64 bytenr, u64 num_bytes) +{ + update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 0); +} + +/* + * remove an extent from the root, returns 0 on success + */ +static int __free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner_objectid, + u64 owner_offset, int refs_to_drop) +{ + + struct btrfs_key key; + struct btrfs_path *path; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + int ret; + int is_data; + int extent_slot = 0; + int found_extent = 0; + int num_to_del = 1; + u32 item_size; + u64 refs; + int skinny_metadata = + btrfs_fs_incompat(extent_root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + + if (root->fs_info->free_extent_hook) { + root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes, + parent, root_objectid, owner_objectid, + owner_offset, refs_to_drop); + + } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + path->reada = 1; + + is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; + if (is_data) + skinny_metadata = 0; + BUG_ON(!is_data && refs_to_drop != 1); + + ret = lookup_extent_backref(trans, extent_root, path, &iref, + bytenr, num_bytes, parent, + root_objectid, owner_objectid, + owner_offset); + if (ret == 0) { + extent_slot = path->slots[0]; + while (extent_slot >= 0) { + btrfs_item_key_to_cpu(path->nodes[0], &key, + extent_slot); + if (key.objectid != bytenr) + break; + if (key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) { + found_extent = 1; + break; + } + if (key.type == BTRFS_METADATA_ITEM_KEY && + key.offset == owner_objectid) { + found_extent = 1; + break; + } + if (path->slots[0] - extent_slot > 5) + break; + extent_slot--; + } +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); + if (found_extent && item_size < sizeof(*ei)) + found_extent = 0; +#endif + if (!found_extent) { + BUG_ON(iref); + ret = remove_extent_backref(trans, extent_root, path, + NULL, refs_to_drop, + is_data); + BUG_ON(ret); + btrfs_release_path(path); + + key.objectid = bytenr; + + if (skinny_metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner_objectid; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + } + + ret = btrfs_search_slot(trans, extent_root, + &key, path, -1, 1); + if (ret > 0 && skinny_metadata && path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], + &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + btrfs_release_path(path); + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + ret = btrfs_search_slot(trans, extent_root, + &key, path, -1, 1); + } + + if (ret) { + printk(KERN_ERR "umm, got %d back from search" + ", was looking for %llu\n", ret, + (unsigned long long)bytenr); + btrfs_print_leaf(extent_root, path->nodes[0]); + } + BUG_ON(ret); + extent_slot = path->slots[0]; + } + } else { + printk(KERN_ERR "btrfs unable to find ref byte nr %llu " + "parent %llu root %llu owner %llu offset %llu\n", + (unsigned long long)bytenr, + (unsigned long long)parent, + (unsigned long long)root_objectid, + (unsigned long long)owner_objectid, + (unsigned long long)owner_offset); + ret = -EIO; + goto fail; + } + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, extent_slot); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (item_size < sizeof(*ei)) { + BUG_ON(found_extent || extent_slot != path->slots[0]); + ret = convert_extent_item_v0(trans, extent_root, path, + owner_objectid, 0); + BUG_ON(ret < 0); + + btrfs_release_path(path); + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + + ret = btrfs_search_slot(trans, extent_root, &key, path, + -1, 1); + if (ret) { + printk(KERN_ERR "umm, got %d back from search" + ", was looking for %llu\n", ret, + (unsigned long long)bytenr); + btrfs_print_leaf(extent_root, path->nodes[0]); + } + BUG_ON(ret); + extent_slot = path->slots[0]; + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, extent_slot); + } +#endif + BUG_ON(item_size < sizeof(*ei)); + ei = btrfs_item_ptr(leaf, extent_slot, + struct btrfs_extent_item); + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && + key.type == BTRFS_EXTENT_ITEM_KEY) { + struct btrfs_tree_block_info *bi; + BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); + bi = (struct btrfs_tree_block_info *)(ei + 1); + WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); + } + + refs = btrfs_extent_refs(leaf, ei); + BUG_ON(refs < refs_to_drop); + refs -= refs_to_drop; + + if (refs > 0) { + /* + * In the case of inline back ref, reference count will + * be updated by remove_extent_backref + */ + if (iref) { + BUG_ON(!found_extent); + } else { + btrfs_set_extent_refs(leaf, ei, refs); + btrfs_mark_buffer_dirty(leaf); + } + if (found_extent) { + ret = remove_extent_backref(trans, extent_root, path, + iref, refs_to_drop, + is_data); + BUG_ON(ret); + } + } else { + int mark_free = 0; + int pin = 1; + + if (found_extent) { + BUG_ON(is_data && refs_to_drop != + extent_data_ref_count(root, path, iref)); + if (iref) { + BUG_ON(path->slots[0] != extent_slot); + } else { + BUG_ON(path->slots[0] != extent_slot + 1); + path->slots[0] = extent_slot; + num_to_del = 2; + } + } + + if (pin) { + ret = pin_down_bytes(trans, root, bytenr, num_bytes, + is_data); + if (ret > 0) + mark_free = 1; + BUG_ON(ret < 0); + } + + ret = btrfs_del_items(trans, extent_root, path, path->slots[0], + num_to_del); + BUG_ON(ret); + btrfs_release_path(path); + + if (is_data) { + ret = btrfs_del_csums(trans, root, bytenr, num_bytes); + BUG_ON(ret); + } + + update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); + } +fail: + btrfs_free_path(path); + finish_current_insert(trans, extent_root); + return ret; +} + +/* + * find all the blocks marked as pending in the radix tree and remove + * them from the extent map + */ +static int del_pending_extents(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root) +{ + int ret; + int err = 0; + u64 start; + u64 end; + u64 priv; + struct extent_io_tree *pending_del; + struct extent_io_tree *extent_ins; + struct pending_extent_op *extent_op; + + extent_ins = &extent_root->fs_info->extent_ins; + pending_del = &extent_root->fs_info->pending_del; + + while(1) { + ret = find_first_extent_bit(pending_del, 0, &start, &end, + EXTENT_LOCKED); + if (ret) + break; + + ret = get_state_private(pending_del, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *)(unsigned long)priv; + + clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, + GFP_NOFS); + + if (!test_range_bit(extent_ins, start, end, + EXTENT_LOCKED, 0)) { + ret = __free_extent(trans, extent_root, + start, end + 1 - start, 0, + extent_root->root_key.objectid, + extent_op->level, 0, 1); + kfree(extent_op); + } else { + kfree(extent_op); + ret = get_state_private(extent_ins, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *) + (unsigned long)priv; + + clear_extent_bits(extent_ins, start, end, + EXTENT_LOCKED, GFP_NOFS); + + if (extent_op->type == PENDING_BACKREF_UPDATE) + BUG_ON(1); + + kfree(extent_op); + } + if (ret) + err = ret; + } + return err; +} + + +int btrfs_free_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + u64 parent, int last_ref) +{ + return btrfs_free_extent(trans, root, buf->start, buf->len, parent, + root->root_key.objectid, + btrfs_header_level(buf), 0); +} + +/* + * remove an extent from the root, returns 0 on success + */ + +int btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + int pending_ret; + int ret; + + WARN_ON(num_bytes < root->sectorsize); + if (root == extent_root) { + struct pending_extent_op *extent_op; + + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_EXTENT_DELETE; + extent_op->bytenr = bytenr; + extent_op->num_bytes = num_bytes; + extent_op->level = (int)owner; + + set_extent_bits(&root->fs_info->pending_del, + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->pending_del, + bytenr, (unsigned long)extent_op); + return 0; + } + ret = __free_extent(trans, root, bytenr, num_bytes, parent, + root_objectid, owner, offset, 1); + pending_ret = del_pending_extents(trans, root->fs_info->extent_root); + return ret ? ret : pending_ret; +} + +static u64 stripe_align(struct btrfs_root *root, u64 val) +{ + u64 mask = ((u64)root->stripesize - 1); + u64 ret = (val + mask) & ~mask; + return ret; +} + +/* + * walks the btree of allocated extents and find a hole of a given size. + * The key ins is changed to record the hole: + * ins->objectid == block start + * ins->flags = BTRFS_EXTENT_ITEM_KEY + * ins->offset == number of blocks + * Any available blocks before search_start are skipped. + */ +static int noinline find_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *orig_root, + u64 num_bytes, u64 empty_size, + u64 search_start, u64 search_end, + u64 hint_byte, struct btrfs_key *ins, + u64 exclude_start, u64 exclude_nr, + int data) +{ + int ret; + u64 orig_search_start = search_start; + struct btrfs_root * root = orig_root->fs_info->extent_root; + struct btrfs_fs_info *info = root->fs_info; + u64 total_needed = num_bytes; + struct btrfs_block_group_cache *block_group; + int full_scan = 0; + int wrapped = 0; + + WARN_ON(num_bytes < root->sectorsize); + ins->type = BTRFS_EXTENT_ITEM_KEY; + + search_start = stripe_align(root, search_start); + + if (hint_byte) { + block_group = btrfs_lookup_first_block_group(info, hint_byte); + if (!block_group) + hint_byte = search_start; + block_group = btrfs_find_block_group(root, block_group, + hint_byte, data, 1); + } else { + block_group = btrfs_find_block_group(root, + trans->block_group, + search_start, data, 1); + } + + total_needed += empty_size; + +check_failed: + search_start = stripe_align(root, search_start); + if (!block_group) { + block_group = btrfs_lookup_first_block_group(info, + search_start); + if (!block_group) + block_group = btrfs_lookup_first_block_group(info, + orig_search_start); + } + ret = find_search_start(root, &block_group, &search_start, + total_needed, data); + if (ret) + goto new_group; + + ins->objectid = search_start; + ins->offset = num_bytes; + + if (ins->objectid + num_bytes > + block_group->key.objectid + block_group->key.offset) { + search_start = block_group->key.objectid + + block_group->key.offset; + goto new_group; + } + + if (test_range_bit(&info->extent_ins, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + + if (test_range_bit(&info->pinned_extents, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + + if (info->excluded_extents && + test_range_bit(info->excluded_extents, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && + ins->objectid < exclude_start + exclude_nr)) { + search_start = exclude_start + exclude_nr; + goto new_group; + } + + if (!(data & BTRFS_BLOCK_GROUP_DATA)) { + if (check_crossing_stripes(info, ins->objectid, num_bytes)) { + struct btrfs_block_group_cache *bg_cache; + u64 bg_offset; + + bg_cache = btrfs_lookup_block_group(info, ins->objectid); + if (!bg_cache) + goto no_bg_cache; + bg_offset = ins->objectid - bg_cache->key.objectid; + + search_start = round_up(bg_offset + num_bytes, + BTRFS_STRIPE_LEN) + bg_offset; + goto new_group; + } +no_bg_cache: + block_group = btrfs_lookup_block_group(info, ins->objectid); + if (block_group) + trans->block_group = block_group; + } + ins->offset = num_bytes; + return 0; + +new_group: + block_group = btrfs_lookup_first_block_group(info, search_start); + if (!block_group) { + search_start = orig_search_start; + if (full_scan) { + ret = -ENOSPC; + goto error; + } + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; + full_scan = 1; + } else + wrapped = 1; + } + cond_resched(); + block_group = btrfs_find_block_group(root, block_group, + search_start, data, 0); + goto check_failed; + +error: + return ret; +} + +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 empty_size, + u64 hint_byte, u64 search_end, + struct btrfs_key *ins, int data) +{ + int ret; + u64 search_start = 0; + u64 alloc_profile; + struct btrfs_fs_info *info = root->fs_info; + + if (data) { + alloc_profile = info->avail_data_alloc_bits & + info->data_alloc_profile; + data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; + } else if ((info->system_allocs > 0 || root == info->chunk_root) && + info->system_allocs >= 0) { + alloc_profile = info->avail_system_alloc_bits & + info->system_alloc_profile; + data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; + } else { + alloc_profile = info->avail_metadata_alloc_bits & + info->metadata_alloc_profile; + data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; + } + + if (root->ref_cows) { + if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, + BTRFS_BLOCK_GROUP_METADATA); + BUG_ON(ret); + } + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes + 2 * 1024 * 1024, data); + BUG_ON(ret); + } + + WARN_ON(num_bytes < root->sectorsize); + ret = find_free_extent(trans, root, num_bytes, empty_size, + search_start, search_end, hint_byte, ins, + trans->alloc_exclude_start, + trans->alloc_exclude_nr, data); + BUG_ON(ret); + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + return ret; +} + +static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 generation, + u64 flags, struct btrfs_disk_key *key, + int level, struct btrfs_key *ins) +{ + int ret; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_extent_item *extent_item; + struct btrfs_tree_block_info *block_info; + struct btrfs_extent_inline_ref *iref; + struct btrfs_path *path; + struct extent_buffer *leaf; + u32 size = sizeof(*extent_item) + sizeof(*iref); + int skinny_metadata = + btrfs_fs_incompat(fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + + if (!skinny_metadata) + size += sizeof(*block_info); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, + ins, size); + BUG_ON(ret); + + leaf = path->nodes[0]; + extent_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + btrfs_set_extent_refs(leaf, extent_item, 1); + btrfs_set_extent_generation(leaf, extent_item, generation); + btrfs_set_extent_flags(leaf, extent_item, + flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); + + if (skinny_metadata) { + iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); + } else { + block_info = (struct btrfs_tree_block_info *)(extent_item + 1); + btrfs_set_tree_block_key(leaf, block_info, key); + btrfs_set_tree_block_level(leaf, block_info, level); + iref = (struct btrfs_extent_inline_ref *)(block_info + 1); + } + + btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY); + btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); + + btrfs_mark_buffer_dirty(leaf); + btrfs_free_path(path); + + ret = update_block_group(trans, root, ins->objectid, root->nodesize, + 1, 0); + return ret; +} + +static int alloc_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 num_bytes, + u64 root_objectid, u64 generation, + u64 flags, struct btrfs_disk_key *key, + int level, u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins) +{ + int ret; + ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size, + hint_byte, search_end, ins, 0); + BUG_ON(ret); + + if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) { + struct pending_extent_op *extent_op; + + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_EXTENT_INSERT; + extent_op->bytenr = ins->objectid; + extent_op->num_bytes = ins->offset; + extent_op->level = level; + extent_op->flags = flags; + memcpy(&extent_op->key, key, sizeof(*key)); + + set_extent_bits(&root->fs_info->extent_ins, ins->objectid, + ins->objectid + ins->offset - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->extent_ins, + ins->objectid, (unsigned long)extent_op); + } else { + if (btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) { + ins->offset = level; + ins->type = BTRFS_METADATA_ITEM_KEY; + } + ret = alloc_reserved_tree_block(trans, root, root_objectid, + generation, flags, + key, level, ins); + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + } + return ret; +} + +/* + * helper function to allocate a block for a given tree + * returns the tree buffer or NULL. + */ +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u32 blocksize, u64 root_objectid, + struct btrfs_disk_key *key, int level, + u64 hint, u64 empty_size) +{ + struct btrfs_key ins; + int ret; + struct extent_buffer *buf; + + ret = alloc_tree_block(trans, root, blocksize, root_objectid, + trans->transid, 0, key, level, + empty_size, hint, (u64)-1, &ins); + if (ret) { + BUG_ON(ret > 0); + return ERR_PTR(ret); + } + + buf = btrfs_find_create_tree_block(root->fs_info, ins.objectid, + blocksize); + if (!buf) { + btrfs_free_extent(trans, root, ins.objectid, ins.offset, + 0, root->root_key.objectid, level, 0); + BUG_ON(1); + return ERR_PTR(-ENOMEM); + } + btrfs_set_buffer_uptodate(buf); + trans->blocks_used++; + + return buf; +} + +#if 0 + +static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *leaf) +{ + u64 leaf_owner; + u64 leaf_generation; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + int i; + int nritems; + int ret; + + BUG_ON(!btrfs_is_leaf(leaf)); + nritems = btrfs_header_nritems(leaf); + leaf_owner = btrfs_header_owner(leaf); + leaf_generation = btrfs_header_generation(leaf); + + for (i = 0; i < nritems; i++) { + u64 disk_bytenr; + + btrfs_item_key_to_cpu(leaf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); @@ -1851,505 +2888,1379 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, continue; ret = btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), - leaf_owner, leaf_generation, - key.objectid, key.offset, 0); + leaf->start, leaf_owner, leaf_generation, + key.objectid, 0); + BUG_ON(ret); + } + return 0; +} + +static void noinline reada_walk_down(struct btrfs_root *root, + struct extent_buffer *node, + int slot) +{ + u64 bytenr; + u64 last = 0; + u32 nritems; + u32 refs; + u32 blocksize; + int ret; + int i; + int level; + int skipped = 0; + + nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node); + if (level) + return; + + for (i = slot; i < nritems && skipped < 32; i++) { + bytenr = btrfs_node_blockptr(node, i); + if (last && ((bytenr > last && bytenr - last > 32 * 1024) || + (last > bytenr && last - bytenr > 32 * 1024))) { + skipped++; + continue; + } + blocksize = btrfs_level_size(root, level - 1); + if (i != slot) { + ret = btrfs_lookup_extent_ref(NULL, root, bytenr, + blocksize, &refs); + BUG_ON(ret); + if (refs != 1) { + skipped++; + continue; + } + } + mutex_unlock(&root->fs_info->fs_mutex); + ret = readahead_tree_block(root, bytenr, blocksize, + btrfs_node_ptr_generation(node, i)); + last = bytenr + blocksize; + cond_resched(); + mutex_lock(&root->fs_info->fs_mutex); + if (ret) + break; + } +} + +/* + * helper function for drop_snapshot, this walks down the tree dropping ref + * counts as it goes. + */ +static int noinline walk_down_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level) +{ + u64 root_owner; + u64 root_gen; + u64 bytenr; + u64 ptr_gen; + struct extent_buffer *next; + struct extent_buffer *cur; + struct extent_buffer *parent; + u32 blocksize; + int ret; + u32 refs; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + ret = btrfs_lookup_extent_ref(trans, root, + path->nodes[*level]->start, + path->nodes[*level]->len, &refs); + BUG_ON(ret); + if (refs > 1) + goto out; + + /* + * walk down to the last node level and free all the leaves + */ + while(*level >= 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (btrfs_header_level(cur) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(cur)) + break; + if (*level == 0) { + ret = drop_leaf_ref(trans, root, cur); + BUG_ON(ret); + break; + } + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, + &refs); + BUG_ON(ret); + if (refs != 1) { + parent = path->nodes[*level]; + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + path->slots[*level]++; + ret = btrfs_free_extent(trans, root, bytenr, blocksize, + parent->start, root_owner, + root_gen, *level - 1, 1); + BUG_ON(ret); + continue; + } + next = btrfs_find_tree_block(root, bytenr, blocksize); + if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { + free_extent_buffer(next); + reada_walk_down(root, cur, path->slots[*level]); + mutex_unlock(&root->fs_info->fs_mutex); + next = read_tree_block(root, bytenr, blocksize, + ptr_gen); + mutex_lock(&root->fs_info->fs_mutex); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + ret = PTR_ERR(next); + else + ret = -EIO; + break; + } + } + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + free_extent_buffer(path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(next); + path->slots[*level] = 0; + } +out: + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + if (path->nodes[*level] == root->node) { + root_owner = root->root_key.objectid; + parent = path->nodes[*level]; + } else { + parent = path->nodes[*level + 1]; + root_owner = btrfs_header_owner(parent); + } + + root_gen = btrfs_header_generation(parent); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, + path->nodes[*level]->len, parent->start, + root_owner, root_gen, *level, 1); + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + BUG_ON(ret); + return 0; +} + +/* + * helper for dropping snapshots. This walks back up the tree in the path + * to find the first node higher up where we haven't yet gone through + * all the slots + */ +static int noinline walk_up_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level) +{ + u64 root_owner; + u64 root_gen; + struct btrfs_root_item *root_item = &root->root_item; + int i; + int slot; + int ret; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + struct btrfs_disk_key disk_key; + node = path->nodes[i]; + path->slots[i]++; + *level = i; + WARN_ON(*level == 0); + btrfs_node_key(node, &disk_key, path->slots[i]); + memcpy(&root_item->drop_progress, + &disk_key, sizeof(disk_key)); + root_item->drop_level = i; + return 0; + } else { + struct extent_buffer *parent; + if (path->nodes[*level] == root->node) + parent = path->nodes[*level]; + else + parent = path->nodes[*level + 1]; + + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->start, + path->nodes[*level]->len, + parent->start, root_owner, + root_gen, *level, 1); + BUG_ON(ret); + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +#endif + +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + struct btrfs_space_info *sinfo; + struct btrfs_block_group_cache *cache; + u64 start; + u64 end; + u64 ptr; + int ret; + + while(1) { + ret = find_first_extent_bit(&info->block_group_cache, 0, + &start, &end, (unsigned int)-1); + if (ret) + break; + ret = get_state_private(&info->block_group_cache, start, &ptr); + if (!ret) { + cache = u64_to_ptr(ptr); + if (cache->free_space_ctl) { + btrfs_remove_free_space_cache(cache); + kfree(cache->free_space_ctl); + } + kfree(cache); + } + clear_extent_bits(&info->block_group_cache, start, + end, (unsigned int)-1, GFP_NOFS); + } + while(1) { + ret = find_first_extent_bit(&info->free_space_cache, 0, + &start, &end, EXTENT_DIRTY); + if (ret) + break; + clear_extent_dirty(&info->free_space_cache, start, + end, GFP_NOFS); + } + + while (!list_empty(&info->space_info)) { + sinfo = list_entry(info->space_info.next, + struct btrfs_space_info, list); + list_del_init(&sinfo->list); + kfree(sinfo); + } + return 0; +} + +static int find_first_block_group(struct btrfs_root *root, + struct btrfs_path *path, struct btrfs_key *key) +{ + int ret; + struct btrfs_key found_key; + struct extent_buffer *leaf; + int slot; + + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + while(1) { + slot = path->slots[0]; + leaf = path->nodes[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid >= key->objectid && + found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) + return 0; + path->slots[0]++; + } + ret = -ENOENT; +error: + return ret; +} + +static void account_super_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { + stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; + cache->bytes_super += stripe_len; + } + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&fs_info->mapping_tree, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + if (ret) + return; + + while (nr--) { + u64 start, len; + + if (logical[nr] > cache->key.objectid + + cache->key.offset) + continue; + + if (logical[nr] + stripe_len <= cache->key.objectid) + continue; + + start = logical[nr]; + if (start < cache->key.objectid) { + start = cache->key.objectid; + len = (logical[nr] + stripe_len) - start; + } else { + len = min_t(u64, stripe_len, + cache->key.objectid + + cache->key.offset - start); + } + + cache->bytes_super += len; + } + + kfree(logical); + } +} + +int btrfs_read_block_groups(struct btrfs_root *root) +{ + struct btrfs_path *path; + int ret; + int bit; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *space_info; + struct extent_io_tree *block_group_cache; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *leaf; + + block_group_cache = &info->block_group_cache; + + root = info->extent_root; + key.objectid = 0; + key.offset = 0; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(1) { + ret = find_first_block_group(root, path, &key); + if (ret > 0) { + ret = 0; + goto error; + } + if (ret != 0) { + goto error; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + cache = kzalloc(sizeof(*cache), GFP_NOFS); + if (!cache) { + ret = -ENOMEM; + goto error; + } + + read_extent_buffer(leaf, &cache->item, + btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(cache->item)); + memcpy(&cache->key, &found_key, sizeof(found_key)); + cache->cached = 0; + cache->pinned = 0; + key.objectid = found_key.objectid + found_key.offset; + if (found_key.offset == 0) + key.objectid++; + btrfs_release_path(path); + cache->flags = btrfs_block_group_flags(&cache->item); + bit = 0; + if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { + bit = BLOCK_GROUP_SYSTEM; + } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { + bit = BLOCK_GROUP_METADATA; + } + set_avail_alloc_bits(info, cache->flags); + if (btrfs_chunk_readonly(root, cache->key.objectid)) + cache->ro = 1; + + account_super_bytes(info, cache); + + ret = update_space_info(info, cache->flags, found_key.offset, + btrfs_block_group_used(&cache->item), + &space_info); + BUG_ON(ret); + cache->space_info = space_info; + + /* use EXTENT_LOCKED to prevent merging */ + set_extent_bits(block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, found_key.objectid, + (unsigned long)cache); + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + +struct btrfs_block_group_cache * +btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, + u64 chunk_objectid, u64 chunk_offset, u64 size) +{ + int ret; + int bit = 0; + struct btrfs_block_group_cache *cache; + struct extent_io_tree *block_group_cache; + + block_group_cache = &fs_info->block_group_cache; + + cache = kzalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + cache->key.objectid = chunk_offset; + cache->key.offset = size; + + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + btrfs_set_block_group_used(&cache->item, bytes_used); + btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); + cache->flags = type; + btrfs_set_block_group_flags(&cache->item, type); + + account_super_bytes(fs_info, cache); + ret = update_space_info(fs_info, cache->flags, size, bytes_used, + &cache->space_info); + BUG_ON(ret); + + bit = block_group_state_bits(type); + ret = set_extent_bits(block_group_cache, chunk_offset, + chunk_offset + size - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + BUG_ON(ret); + + ret = set_state_private(block_group_cache, chunk_offset, + (unsigned long)cache); + BUG_ON(ret); + set_avail_alloc_bits(fs_info, type); + + return cache; +} + +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_objectid, u64 chunk_offset, + u64 size) +{ + int ret; + struct btrfs_root *extent_root; + struct btrfs_block_group_cache *cache; + + cache = btrfs_add_block_group(root->fs_info, bytes_used, type, + chunk_objectid, chunk_offset, size); + extent_root = root->fs_info->extent_root; + ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, + sizeof(cache->item)); + BUG_ON(ret); + + ret = finish_current_insert(trans, extent_root); + BUG_ON(ret); + ret = del_pending_extents(trans, extent_root); + BUG_ON(ret); + + return 0; +} + +/* + * This is for converter use only. + * + * In that case, we don't know where are free blocks located. + * Therefore all block group cache entries must be setup properly + * before doing any block allocation. + */ +int btrfs_make_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + u64 total_bytes; + u64 cur_start; + u64 group_type; + u64 group_size; + u64 group_align; + u64 total_data = 0; + u64 total_metadata = 0; + u64 chunk_objectid; + int ret; + int bit; + struct btrfs_root *extent_root; + struct btrfs_block_group_cache *cache; + struct extent_io_tree *block_group_cache; + + extent_root = root->fs_info->extent_root; + block_group_cache = &root->fs_info->block_group_cache; + chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); + group_align = 64 * root->sectorsize; + + cur_start = 0; + while (cur_start < total_bytes) { + group_size = total_bytes / 12; + group_size = min_t(u64, group_size, total_bytes - cur_start); + if (cur_start == 0) { + bit = BLOCK_GROUP_SYSTEM; + group_type = BTRFS_BLOCK_GROUP_SYSTEM; + group_size /= 4; + group_size &= ~(group_align - 1); + group_size = max_t(u64, group_size, 8 * 1024 * 1024); + group_size = min_t(u64, group_size, 32 * 1024 * 1024); + } else { + group_size &= ~(group_align - 1); + if (total_data >= total_metadata * 2) { + group_type = BTRFS_BLOCK_GROUP_METADATA; + group_size = min_t(u64, group_size, + 1ULL * 1024 * 1024 * 1024); + total_metadata += group_size; + } else { + group_type = BTRFS_BLOCK_GROUP_DATA; + group_size = min_t(u64, group_size, + 5ULL * 1024 * 1024 * 1024); + total_data += group_size; + } + if ((total_bytes - cur_start) * 4 < group_size * 5) + group_size = total_bytes - cur_start; + } + + cache = kzalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + + cache->key.objectid = cur_start; + cache->key.offset = group_size; + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + + btrfs_set_block_group_used(&cache->item, 0); + btrfs_set_block_group_chunk_objectid(&cache->item, + chunk_objectid); + btrfs_set_block_group_flags(&cache->item, group_type); + + cache->flags = group_type; + + ret = update_space_info(root->fs_info, group_type, group_size, + 0, &cache->space_info); + BUG_ON(ret); + set_avail_alloc_bits(extent_root->fs_info, group_type); + + set_extent_bits(block_group_cache, cur_start, + cur_start + group_size - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, cur_start, + (unsigned long)cache); + cur_start += group_size; + } + /* then insert all the items */ + cur_start = 0; + while(cur_start < total_bytes) { + cache = btrfs_lookup_block_group(root->fs_info, cur_start); + BUG_ON(!cache); + + ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, + sizeof(cache->item)); + BUG_ON(ret); + + finish_current_insert(trans, extent_root); + ret = del_pending_extents(trans, extent_root); BUG_ON(ret); + + cur_start = cache->key.objectid + cache->key.offset; } return 0; } -static void noinline reada_walk_down(struct btrfs_root *root, - struct extent_buffer *node, - int slot) +int btrfs_update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int alloc, + int mark_free) +{ + return update_block_group(trans, root, bytenr, num_bytes, + alloc, mark_free); +} + +/* + * Just remove a block group item in extent tree + * Caller should ensure the block group is empty and all space is pinned. + * Or new tree block/data may be allocated into it. + */ +static int free_block_group_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_root *root = fs_info->extent_root; + int ret = 0; + + key.objectid = bytenr; + key.offset = len; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + if (ret < 0) + goto out; + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} + +static int free_dev_extent_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 devid, u64 dev_offset) +{ + struct btrfs_root *root = fs_info->dev_root; + struct btrfs_path *path; + struct btrfs_key key; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = devid; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = dev_offset; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} + +static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 chunk_offset) { - u64 bytenr; - u64 last = 0; - u32 nritems; - u32 refs; - u32 blocksize; - int ret; + struct btrfs_chunk *chunk = NULL; + struct btrfs_root *root= fs_info->chunk_root; + struct btrfs_path *path; + struct btrfs_key key; + u16 num_stripes; int i; - int level; - int skipped = 0; + int ret; - nritems = btrfs_header_nritems(node); - level = btrfs_header_level(node); - if (level) - return; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; - for (i = slot; i < nritems && skipped < 32; i++) { - bytenr = btrfs_node_blockptr(node, i); - if (last && ((bytenr > last && bytenr - last > 32 * 1024) || - (last > bytenr && last - bytenr > 32 * 1024))) { - skipped++; - continue; + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = chunk_offset; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk); + for (i = 0; i < num_stripes; i++) { + ret = free_dev_extent_item(trans, fs_info, + btrfs_stripe_devid_nr(path->nodes[0], chunk, i), + btrfs_stripe_offset_nr(path->nodes[0], chunk, i)); + if (ret < 0) + goto out; + } +out: + btrfs_free_path(path); + return ret; +} + +static int free_system_chunk_item(struct btrfs_super_block *super, + struct btrfs_key *key) +{ + struct btrfs_disk_key *disk_key; + struct btrfs_key cpu_key; + u32 array_size = btrfs_super_sys_array_size(super); + char *ptr = (char *)super->sys_chunk_array; + int cur = 0; + int ret = -ENOENT; + + while (cur < btrfs_super_sys_array_size(super)) { + struct btrfs_chunk *chunk; + u32 num_stripes; + u32 chunk_len; + + disk_key = (struct btrfs_disk_key *)(ptr + cur); + btrfs_disk_key_to_cpu(&cpu_key, disk_key); + if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) { + /* just in case */ + ret = -EIO; + goto out; } - blocksize = btrfs_level_size(root, level - 1); - if (i != slot) { - ret = lookup_extent_ref(NULL, root, bytenr, - blocksize, &refs); - BUG_ON(ret); - if (refs != 1) { - skipped++; - continue; - } + + chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key)); + num_stripes = btrfs_stack_chunk_num_stripes(chunk); + chunk_len = btrfs_chunk_item_size(num_stripes) + + sizeof(*disk_key); + + if (key->objectid == cpu_key.objectid && + key->offset == cpu_key.offset && + key->type == cpu_key.type) { + memmove(ptr + cur, ptr + cur + chunk_len, + array_size - cur - chunk_len); + array_size -= chunk_len; + btrfs_set_super_sys_array_size(super, array_size); + ret = 0; + goto out; } - mutex_unlock(&root->fs_info->fs_mutex); - ret = readahead_tree_block(root, bytenr, blocksize); - last = bytenr + blocksize; - cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); - if (ret) - break; + + cur += chunk_len; } +out: + return ret; } -/* - * helper function for drop_snapshot, this walks down the tree dropping ref - * counts as it goes. - */ -static int noinline walk_down_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int *level) +static int free_chunk_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) { - u64 root_owner; - u64 root_gen; - u64 bytenr; - struct extent_buffer *next; - struct extent_buffer *cur; - struct extent_buffer *parent; - u32 blocksize; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_root *root = fs_info->chunk_root; + struct btrfs_chunk *chunk; + u64 chunk_type; int ret; - u32 refs; - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_extent_ref(trans, root, - path->nodes[*level]->start, - path->nodes[*level]->len, &refs); - BUG_ON(ret); - if (refs > 1) - goto out; + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.offset = bytenr; + key.type = BTRFS_CHUNK_ITEM_KEY; - /* - * walk down to the last node level and free all the leaves - */ - while(*level >= 0) { - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - cur = path->nodes[*level]; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; - if (btrfs_header_level(cur) != *level) - WARN_ON(1); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + if (ret < 0) + goto out; + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + chunk_type = btrfs_chunk_type(path->nodes[0], chunk); - if (path->slots[*level] >= - btrfs_header_nritems(cur)) - break; - if (*level == 0) { - ret = drop_leaf_ref(trans, root, cur); - BUG_ON(ret); - break; - } - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); - ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); - BUG_ON(ret); - if (refs != 1) { - parent = path->nodes[*level]; - root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); - path->slots[*level]++; - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, root_owner, - root_gen, 0, 0, 1); - BUG_ON(ret); - continue; - } - next = btrfs_find_tree_block(root, bytenr, blocksize); - if (!next || !btrfs_buffer_uptodate(next)) { - free_extent_buffer(next); - reada_walk_down(root, cur, path->slots[*level]); - mutex_unlock(&root->fs_info->fs_mutex); - next = read_tree_block(root, bytenr, blocksize); - mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_del_item(trans, root, path); + if (ret < 0) + goto out; - /* we dropped the lock, check one more time */ - ret = lookup_extent_ref(trans, root, bytenr, - blocksize, &refs); - BUG_ON(ret); - if (refs != 1) { - parent = path->nodes[*level]; - root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); - - path->slots[*level]++; - free_extent_buffer(next); - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, - root_owner, - root_gen, 0, 0, 1); - BUG_ON(ret); - continue; - } - } - WARN_ON(*level <= 0); - if (path->nodes[*level-1]) - free_extent_buffer(path->nodes[*level-1]); - path->nodes[*level-1] = next; - *level = btrfs_header_level(next); - path->slots[*level] = 0; - } + if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) + ret = free_system_chunk_item(fs_info->super_copy, &key); out: - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); + btrfs_free_path(path); + return ret; +} - if (path->nodes[*level] == root->node) { - root_owner = root->root_key.objectid; - parent = path->nodes[*level]; - } else { - parent = path->nodes[*level + 1]; - root_owner = btrfs_header_owner(parent); - } +static u64 get_dev_extent_len(struct map_lookup *map) +{ + int div; - root_gen = btrfs_header_generation(parent); - ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, - path->nodes[*level]->len, - root_owner, root_gen, 0, 0, 1); - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level += 1; - BUG_ON(ret); - return 0; + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* Single */ + case BTRFS_BLOCK_GROUP_DUP: + case BTRFS_BLOCK_GROUP_RAID1: + div = 1; + break; + case BTRFS_BLOCK_GROUP_RAID5: + div = (map->num_stripes - 1); + break; + case BTRFS_BLOCK_GROUP_RAID6: + div = (map->num_stripes - 2); + break; + case BTRFS_BLOCK_GROUP_RAID10: + div = (map->num_stripes / map->sub_stripes); + break; + default: + /* normally, read chunk security hook should handled it */ + BUG_ON(1); + } + return map->ce.size / div; } -/* - * helper for dropping snapshots. This walks back up the tree in the path - * to find the first node higher up where we haven't yet gone through - * all the slots - */ -static int noinline walk_up_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int *level) +/* free block group/chunk related caches */ +static int free_block_group_cache(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) { - u64 root_owner; - u64 root_gen; - struct btrfs_root_item *root_item = &root->root_item; - int i; - int slot; + struct btrfs_block_group_cache *cache; + struct cache_extent *ce; + struct map_lookup *map; int ret; + int i; + u64 flags; - for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { - slot = path->slots[i]; - if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { - struct extent_buffer *node; - struct btrfs_disk_key disk_key; - node = path->nodes[i]; - path->slots[i]++; - *level = i; - WARN_ON(*level == 0); - btrfs_node_key(node, &disk_key, path->slots[i]); - memcpy(&root_item->drop_progress, - &disk_key, sizeof(disk_key)); - root_item->drop_level = i; - return 0; - } else { - if (path->nodes[*level] == root->node) { - root_owner = root->root_key.objectid; - root_gen = - btrfs_header_generation(path->nodes[*level]); - } else { - struct extent_buffer *node; - node = path->nodes[*level + 1]; - root_owner = btrfs_header_owner(node); - root_gen = btrfs_header_generation(node); - } - ret = btrfs_free_extent(trans, root, - path->nodes[*level]->start, - path->nodes[*level]->len, - root_owner, root_gen, 0, 0, 1); - BUG_ON(ret); - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level = i + 1; - } + /* Free block group cache first */ + cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!cache) + return -ENOENT; + flags = cache->flags; + if (cache->free_space_ctl) { + btrfs_remove_free_space_cache(cache); + kfree(cache->free_space_ctl); } - return 1; + clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len, + (unsigned int)-1, GFP_NOFS); + ret = free_space_info(fs_info, flags, len, 0, NULL); + if (ret < 0) + goto out; + kfree(cache); + + /* Then free mapping info and dev usage info */ + ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr); + if (!ce || ce->start != bytenr) { + ret = -ENOENT; + goto out; + } + map = container_of(ce, struct map_lookup, ce); + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_device *device; + + device = map->stripes[i].dev; + device->bytes_used -= get_dev_extent_len(map); + ret = btrfs_update_device(trans, device); + if (ret < 0) + goto out; + } + remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce); + free(map); +out: + return ret; } -/* - * drop the reference count on the tree rooted at 'snap'. This traverses - * the tree freeing any blocks that have a ref count of zero after being - * decremented. - */ -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root) +int btrfs_free_block_group(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 len) { - int ret = 0; - int wret; - int level; + struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_path *path; - int i; - int orig_level; - struct btrfs_root_item *root_item = &root->root_item; + struct btrfs_block_group_item *bgi; + struct btrfs_key key; + int ret = 0; path = btrfs_alloc_path(); - BUG_ON(!path); - - level = btrfs_header_level(root->node); - orig_level = level; - if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { - path->nodes[level] = root->node; - extent_buffer_get(root->node); - path->slots[level] = 0; - } else { - struct btrfs_key key; - struct btrfs_disk_key found_key; - struct extent_buffer *node; - - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); - level = root_item->drop_level; - path->lowest_level = level; - wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (wret < 0) { - ret = wret; - goto out; - } - node = path->nodes[level]; - btrfs_node_key(node, &found_key, path->slots[level]); - WARN_ON(memcmp(&found_key, &root_item->drop_progress, - sizeof(found_key))); + if (!path) + return -ENOMEM; + + key.objectid = bytenr; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + key.offset = len; + + /* Double check the block group to ensure it's empty */ + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); + if (ret > 0) { + ret = -ENONET; + goto out; } - while(1) { - wret = walk_down_tree(trans, root, path, &level); - if (wret < 0) - ret = wret; - if (wret != 0) - break; + if (ret < 0) + goto out; - wret = walk_up_tree(trans, root, path, &level); - if (wret < 0) - ret = wret; - if (wret != 0) - break; - /* - ret = -EAGAIN; - break; - */ + bgi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_block_group_item); + if (btrfs_disk_block_group_used(path->nodes[0], bgi)) { + fprintf(stderr, + "WARNING: block group [%llu,%llu) is not empty\n", + bytenr, bytenr + len); + ret = -EINVAL; + goto out; } - for (i = 0; i <= orig_level; i++) { - if (path->nodes[i]) { - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } + btrfs_release_path(path); + + /* + * Now pin all space in the block group, to prevent further transaction + * allocate space from it. + * Every operation needs a transaction must be in the range. + */ + btrfs_pin_extent(fs_info, bytenr, len); + + /* delete block group item and chunk item */ + ret = free_block_group_item(trans, fs_info, bytenr, len); + if (ret < 0) { + fprintf(stderr, + "failed to free block group item for [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + + ret = free_chunk_dev_extent_items(trans, fs_info, bytenr); + if (ret < 0) { + fprintf(stderr, + "failed to dev extents belongs to [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; } + ret = free_chunk_item(trans, fs_info, bytenr, len); + if (ret < 0) { + fprintf(stderr, + "failed to free chunk for [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + + /* Now release the block_group_cache */ + ret = free_block_group_cache(trans, fs_info, bytenr, len); + btrfs_unpin_extent(fs_info, bytenr, len); + out: btrfs_free_path(path); return ret; } -int btrfs_free_block_groups(struct btrfs_fs_info *info) +/* + * Fixup block accounting. The initial block accounting created by + * make_block_groups isn't accuracy in this case. + */ +int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { - u64 start; - u64 end; - u64 ptr; int ret; - while(1) { - ret = find_first_extent_bit(&info->block_group_cache, 0, - &start, &end, (unsigned int)-1); + int slot; + u64 start = 0; + u64 bytes_used = 0; + struct btrfs_path path; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + root = root->fs_info->extent_root; + + while(extent_root_pending_ops(fs_info)) { + ret = finish_current_insert(trans, root); if (ret) - break; - ret = get_state_private(&info->block_group_cache, start, &ptr); - if (!ret) - kfree((void *)(unsigned long)ptr); - clear_extent_bits(&info->block_group_cache, start, - end, (unsigned int)-1, GFP_NOFS); + return ret; + ret = del_pending_extents(trans, root); + if (ret) + return ret; } + while(1) { - ret = find_first_extent_bit(&info->free_space_cache, 0, - &start, &end, EXTENT_DIRTY); - if (ret) + cache = btrfs_lookup_first_block_group(fs_info, start); + if (!cache) break; - clear_extent_dirty(&info->free_space_cache, start, - end, GFP_NOFS); + start = cache->key.objectid + cache->key.offset; + btrfs_set_block_group_used(&cache->item, 0); + cache->space_info->bytes_used = 0; + set_extent_bits(&root->fs_info->block_group_cache, + cache->key.objectid, + cache->key.objectid + cache->key.offset -1, + BLOCK_GROUP_DIRTY, GFP_NOFS); + } + + btrfs_init_path(&path); + key.offset = 0; + key.objectid = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, &path, 0, 0); + if (ret < 0) + return ret; + while(1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) + return ret; + if (ret > 0) + break; + leaf = path.nodes[0]; + slot = path.slots[0]; + } + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + bytes_used += key.offset; + ret = btrfs_update_block_group(trans, root, + key.objectid, key.offset, 1, 0); + BUG_ON(ret); + } else if (key.type == BTRFS_METADATA_ITEM_KEY) { + bytes_used += root->nodesize; + ret = btrfs_update_block_group(trans, root, + key.objectid, root->nodesize, 1, 0); + BUG_ON(ret); + } + path.slots[0]++; } + btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used); + btrfs_release_path(&path); return 0; } -int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key) +static void __get_extent_size(struct btrfs_root *root, struct btrfs_path *path, + u64 *start, u64 *len) +{ + struct btrfs_key key; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + BUG_ON(!(key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY)); + *start = key.objectid; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + *len = key.offset; + else + *len = root->nodesize; +} + +/* + * Find first overlap extent for range [bytenr, bytenr + len) + * Return 0 for found and point path to it. + * Return >0 for not found. + * Return <0 for err + */ +int btrfs_search_overlap_extent(struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, u64 len) { + struct btrfs_key key; + u64 cur_start; + u64 cur_len; int ret; - struct btrfs_key found_key; - struct extent_buffer *leaf; - int slot; - ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + key.objectid = bytenr; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ret; - while(1) { - slot = path->slots[0]; - leaf = path->nodes[0]; - if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto error; - break; - } - btrfs_item_key_to_cpu(leaf, &found_key, slot); + BUG_ON(ret == 0); - if (found_key.objectid >= key->objectid && - found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) - return 0; - path->slots[0]++; - } - ret = -ENOENT; -error: - return ret; + ret = btrfs_previous_extent_item(root, path, 0); + if (ret < 0) + return ret; + /* no previous, check next extent */ + if (ret > 0) + goto next; + __get_extent_size(root, path, &cur_start, &cur_len); + /* Tail overlap */ + if (cur_start + cur_len > bytenr) + return 1; + +next: + ret = btrfs_next_extent_item(root, path, bytenr + len); + if (ret < 0) + return ret; + /* No next, prev already checked, no overlap */ + if (ret > 0) + return 0; + __get_extent_size(root, path, &cur_start, &cur_len); + /* head overlap*/ + if (cur_start < bytenr + len) + return 1; + return 0; } -int btrfs_read_block_groups(struct btrfs_root *root) +static int __btrfs_record_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode, + u64 file_pos, u64 disk_bytenr, + u64 *ret_num_bytes) { - struct btrfs_path *path; int ret; - int bit; - struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct btrfs_space_info *space_info; - struct extent_io_tree *block_group_cache; - struct btrfs_key key; - struct btrfs_key found_key; + struct btrfs_root *extent_root = info->extent_root; struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key ins_key; + struct btrfs_path *path; + struct btrfs_extent_item *ei; + u64 nbytes; + u64 extent_num_bytes; + u64 extent_bytenr; + u64 extent_offset; + u64 num_bytes = *ret_num_bytes; - block_group_cache = &info->block_group_cache; + /* + * All supported file system should not use its 0 extent. + * As it's for hole + * + * And hole extent has no size limit, no need to loop. + */ + if (disk_bytenr == 0) { + ret = btrfs_insert_file_extent(trans, root, objectid, + file_pos, disk_bytenr, + num_bytes, num_bytes); + return ret; + } + num_bytes = min_t(u64, num_bytes, BTRFS_MAX_EXTENT_SIZE); - root = info->extent_root; - key.objectid = 0; - key.offset = 0; - btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - while(1) { - ret = find_first_block_group(root, path, &key); - if (ret > 0) { - ret = 0; - goto error; - } - if (ret != 0) { - goto error; - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - cache = kmalloc(sizeof(*cache), GFP_NOFS); - if (!cache) { - ret = -ENOMEM; - break; - } + /* First to check extent overlap */ + ret = btrfs_search_overlap_extent(extent_root, path, disk_bytenr, + num_bytes); + if (ret < 0) + goto fail; + if (ret > 0) { + /* Found overlap */ + u64 cur_start; + u64 cur_len; - read_extent_buffer(leaf, &cache->item, - btrfs_item_ptr_offset(leaf, path->slots[0]), - sizeof(cache->item)); - memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->cached = 0; - cache->pinned = 0; - key.objectid = found_key.objectid + found_key.offset; - btrfs_release_path(root, path); - cache->flags = btrfs_block_group_flags(&cache->item); - bit = 0; - if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { - bit = BLOCK_GROUP_DATA; - } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { - bit = BLOCK_GROUP_SYSTEM; - } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { - bit = BLOCK_GROUP_METADATA; - } - set_avail_alloc_bits(info, cache->flags); + __get_extent_size(extent_root, path, &cur_start, &cur_len); + /* + * For convert case, this extent should be a subset of + * existing one. + */ + BUG_ON(disk_bytenr < cur_start); - ret = update_space_info(info, cache->flags, found_key.offset, - btrfs_block_group_used(&cache->item), - &space_info); - BUG_ON(ret); - cache->space_info = space_info; + extent_bytenr = cur_start; + extent_num_bytes = cur_len; + extent_offset = disk_bytenr - extent_bytenr; + } else { + /* No overlap, create new extent */ + btrfs_release_path(path); + ins_key.objectid = disk_bytenr; + ins_key.offset = num_bytes; + ins_key.type = BTRFS_EXTENT_ITEM_KEY; + + ret = btrfs_insert_empty_item(trans, extent_root, path, + &ins_key, sizeof(*ei)); + if (ret == 0) { + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + + btrfs_set_extent_refs(leaf, ei, 0); + btrfs_set_extent_generation(leaf, ei, 0); + btrfs_set_extent_flags(leaf, ei, + BTRFS_EXTENT_FLAG_DATA); + btrfs_mark_buffer_dirty(leaf); + + ret = btrfs_update_block_group(trans, root, disk_bytenr, + num_bytes, 1, 0); + if (ret) + goto fail; + } else if (ret != -EEXIST) { + goto fail; + } + btrfs_extent_post_op(trans, extent_root); + extent_bytenr = disk_bytenr; + extent_num_bytes = num_bytes; + extent_offset = 0; + } + btrfs_release_path(path); + ins_key.objectid = objectid; + ins_key.offset = file_pos; + ins_key.type = BTRFS_EXTENT_DATA_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &ins_key, + sizeof(*fi)); + if (ret) + goto fail; + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_disk_bytenr(leaf, fi, extent_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, extent_num_bytes); + btrfs_set_file_extent_offset(leaf, fi, extent_offset); + btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, extent_num_bytes); + btrfs_set_file_extent_compression(leaf, fi, 0); + btrfs_set_file_extent_encryption(leaf, fi, 0); + btrfs_set_file_extent_other_encoding(leaf, fi, 0); + btrfs_mark_buffer_dirty(leaf); - /* use EXTENT_LOCKED to prevent merging */ - set_extent_bits(block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, - bit | EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, found_key.objectid, - (unsigned long)cache); + nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes; + btrfs_set_stack_inode_nbytes(inode, nbytes); + btrfs_release_path(path); - if (key.objectid >= - btrfs_super_total_bytes(&info->super_copy)) - break; - } + ret = btrfs_inc_extent_ref(trans, root, extent_bytenr, extent_num_bytes, + 0, root->root_key.objectid, objectid, + file_pos - extent_offset); + if (ret) + goto fail; ret = 0; -error: + *ret_num_bytes = min(extent_num_bytes - extent_offset, num_bytes); +fail: btrfs_free_path(path); return ret; } -int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, - u64 size) +/* + * Record a file extent. Do all the required works, such as inserting + * file extent item, inserting extent item and backref item into extent + * tree and updating block accounting. + */ +int btrfs_record_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode, + u64 file_pos, u64 disk_bytenr, + u64 num_bytes) { - int ret; - int bit = 0; - struct btrfs_root *extent_root; - struct btrfs_block_group_cache *cache; - struct extent_io_tree *block_group_cache; + u64 cur_disk_bytenr = disk_bytenr; + u64 cur_file_pos = file_pos; + u64 cur_num_bytes = num_bytes; + int ret = 0; - extent_root = root->fs_info->extent_root; - block_group_cache = &root->fs_info->block_group_cache; + while (num_bytes > 0) { + ret = __btrfs_record_file_extent(trans, root, objectid, + inode, cur_file_pos, + cur_disk_bytenr, + &cur_num_bytes); + if (ret < 0) + break; + cur_disk_bytenr += cur_num_bytes; + cur_file_pos += cur_num_bytes; + num_bytes -= cur_num_bytes; + } + return ret; +} - cache = kzalloc(sizeof(*cache), GFP_NOFS); - BUG_ON(!cache); - cache->key.objectid = chunk_objectid; - cache->key.offset = size; - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); - btrfs_set_block_group_used(&cache->item, bytes_used); - btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree); - btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); - cache->flags = type; - btrfs_set_block_group_flags(&cache->item, type); - ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, - &cache->space_info); - BUG_ON(ret); +static int add_excluded_extent(struct btrfs_root *root, + u64 start, u64 num_bytes) +{ + u64 end = start + num_bytes - 1; + set_extent_bits(&root->fs_info->pinned_extents, + start, end, EXTENT_UPTODATE, GFP_NOFS); + return 0; +} - bit = block_group_state_bits(type); - set_extent_bits(block_group_cache, chunk_objectid, - chunk_objectid + size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, chunk_objectid, - (unsigned long)cache); +void free_excluded_extents(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 start, end; - ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, - sizeof(cache->item)); - BUG_ON(ret); + start = cache->key.objectid; + end = start + cache->key.offset - 1; - finish_current_insert(trans, extent_root); - ret = del_pending_extents(trans, extent_root); - BUG_ON(ret); - set_avail_alloc_bits(extent_root->fs_info, type); - return 0; + clear_extent_bits(&root->fs_info->pinned_extents, + start, end, EXTENT_UPTODATE, GFP_NOFS); } -u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) +int exclude_super_stripes(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) { - return hash_extent_ref(root_objectid, ref_generation, - owner, owner_offset); + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { + stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; + cache->bytes_super += stripe_len; + ret = add_excluded_extent(root, cache->key.objectid, + stripe_len); + if (ret) + return ret; + } + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + if (ret) + return ret; + + while (nr--) { + u64 start, len; + + if (logical[nr] > cache->key.objectid + + cache->key.offset) + continue; + + if (logical[nr] + stripe_len <= cache->key.objectid) + continue; + + start = logical[nr]; + if (start < cache->key.objectid) { + start = cache->key.objectid; + len = (logical[nr] + stripe_len) - start; + } else { + len = min_t(u64, stripe_len, + cache->key.objectid + + cache->key.offset - start); + } + + cache->bytes_super += len; + ret = add_excluded_extent(root, start, len); + if (ret) { + kfree(logical); + return ret; + } + } + + kfree(logical); + } + return 0; } -int btrfs_update_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, int alloc, - int mark_free) +u64 add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end) { - return update_block_group(trans, root, bytenr, num_bytes, - alloc, mark_free); + u64 extent_start, extent_end, size, total_added = 0; + int ret; + + while (start < end) { + ret = find_first_extent_bit(&info->pinned_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY | EXTENT_UPTODATE); + if (ret) + break; + + if (extent_start <= start) { + start = extent_end + 1; + } else if (extent_start > start && extent_start < end) { + size = extent_start - start; + total_added += size; + ret = btrfs_add_free_space(block_group->free_space_ctl, + start, size); + BUG_ON(ret); /* -ENOMEM or logic error */ + start = extent_end + 1; + } else { + break; + } + } + + if (start < end) { + size = end - start; + total_added += size; + ret = btrfs_add_free_space(block_group->free_space_ctl, start, + size); + BUG_ON(ret); /* -ENOMEM or logic error */ + } + + return total_added; }