X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=ctree.c;h=4fc33b14000ad0dae26f38f9d31dfedd226e05d1;hb=88ef0b8397fa13f6114dc3ba5c8e88a7b7c992f9;hp=925cafaad247cb39b842ea71ce7b487d895a44bc;hpb=428b7fa6302c97f5a79f4864a448f481b08a0dfe;p=platform%2Fupstream%2Fbtrfs-progs.git diff --git a/ctree.c b/ctree.c index 925cafa..4fc33b1 100644 --- a/ctree.c +++ b/ctree.c @@ -19,6 +19,9 @@ #include "disk-io.h" #include "transaction.h" #include "print-tree.h" +#include "repair.h" +#include "internal.h" +#include "sizes.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); @@ -32,8 +35,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *dst_buf, struct extent_buffer *src_buf); -static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot); inline void btrfs_init_path(struct btrfs_path *p) { @@ -43,32 +44,30 @@ inline void btrfs_init_path(struct btrfs_path *p) struct btrfs_path *btrfs_alloc_path(void) { struct btrfs_path *path; - path = kmalloc(sizeof(struct btrfs_path), GFP_NOFS); - if (path) { - btrfs_init_path(path); - path->reada = 0; - } + path = kzalloc(sizeof(struct btrfs_path), GFP_NOFS); return path; } void btrfs_free_path(struct btrfs_path *p) { - btrfs_release_path(NULL, p); + if (!p) + return; + btrfs_release_path(p); kfree(p); } -void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) +void btrfs_release_path(struct btrfs_path *p) { int i; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) - break; + continue; free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); } -static void add_root_to_dirty_list(struct btrfs_root *root) +void add_root_to_dirty_list(struct btrfs_root *root) { if (root->track_dirty && list_empty(&root->dirty_list)) { list_add(&root->dirty_list, @@ -85,6 +84,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, int ret = 0; int level; struct btrfs_root *new_root; + struct btrfs_disk_key disk_key; new_root = kmalloc(sizeof(*new_root), GFP_NOFS); if (!new_root) @@ -98,8 +98,12 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, WARN_ON(root->ref_cows && trans->transid != root->last_trans); level = btrfs_header_level(buf); - cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, - new_root_objectid, trans->transid, + if (level == 0) + btrfs_item_key(buf, &disk_key, 0); + else + btrfs_node_key(buf, &disk_key, 0); + cow = btrfs_alloc_free_block(trans, new_root, buf->len, + new_root_objectid, &disk_key, level, buf->start, 0); if (IS_ERR(cow)) { kfree(new_root); @@ -109,11 +113,19 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); - btrfs_set_header_owner(cow, new_root_objectid); - btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); + btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); + btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | + BTRFS_HEADER_FLAG_RELOC); + if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) + btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); + else + btrfs_set_header_owner(cow, new_root_objectid); + + write_extent_buffer(cow, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); WARN_ON(btrfs_header_generation(buf) > trans->transid); - ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); + ret = btrfs_inc_ref(trans, new_root, cow, 0); kfree(new_root); if (ret) @@ -124,6 +136,125 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } +/* + * check if the tree block can be shared by multiple trees + */ +static int btrfs_block_can_be_shared(struct btrfs_root *root, + struct extent_buffer *buf) +{ + /* + * Tree blocks not in reference counted trees and tree roots + * are never shared. If a block was allocated after the last + * snapshot and the block was not allocated by tree relocation, + * we know the block is not shared. + */ + if (root->ref_cows && + buf != root->node && buf != root->commit_root && + (btrfs_header_generation(buf) <= + btrfs_root_last_snapshot(&root->root_item) || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) + return 1; +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (root->ref_cows && + btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) + return 1; +#endif + return 0; +} + +static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *cow) +{ + u64 refs; + u64 owner; + u64 flags; + u64 new_flags = 0; + int ret; + + /* + * Backrefs update rules: + * + * Always use full backrefs for extent pointers in tree block + * allocated by tree relocation. + * + * If a shared tree block is no longer referenced by its owner + * tree (btrfs_header_owner(buf) == root->root_key.objectid), + * use full backrefs for extent pointers in tree block. + * + * If a tree block is been relocating + * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), + * use full backrefs for extent pointers in tree block. + * The reason for this is some operations (such as drop tree) + * are only allowed for blocks use full backrefs. + */ + + if (btrfs_block_can_be_shared(root, buf)) { + ret = btrfs_lookup_extent_info(trans, root, buf->start, + btrfs_header_level(buf), 1, + &refs, &flags); + BUG_ON(ret); + BUG_ON(refs == 0); + } else { + refs = 1; + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) + flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; + else + flags = 0; + } + + owner = btrfs_header_owner(buf); + BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) && + owner == BTRFS_TREE_RELOC_OBJECTID); + + if (refs > 1) { + if ((owner == root->root_key.objectid || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && + !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { + ret = btrfs_inc_ref(trans, root, buf, 1); + BUG_ON(ret); + + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) { + ret = btrfs_dec_ref(trans, root, buf, 0); + BUG_ON(ret); + ret = btrfs_inc_ref(trans, root, cow, 1); + BUG_ON(ret); + } + new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_inc_ref(trans, root, cow, 1); + else + ret = btrfs_inc_ref(trans, root, cow, 0); + BUG_ON(ret); + } + if (new_flags != 0) { + ret = btrfs_set_block_flags(trans, root, buf->start, + btrfs_header_level(buf), + new_flags); + BUG_ON(ret); + } + } else { + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_inc_ref(trans, root, cow, 1); + else + ret = btrfs_inc_ref(trans, root, cow, 0); + BUG_ON(ret); + ret = btrfs_dec_ref(trans, root, buf, 1); + BUG_ON(ret); + } + clean_tree_block(trans, root, buf); + } + return 0; +} + int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -131,26 +262,23 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret, u64 search_start, u64 empty_size) { - u64 parent_start; struct extent_buffer *cow; - u32 nritems; - int ret = 0; - int different_trans = 0; + struct btrfs_disk_key disk_key; int level; WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); WARN_ON(root->ref_cows && trans->transid != root->last_trans); - if (parent) - parent_start = parent->start; + level = btrfs_header_level(buf); + + if (level == 0) + btrfs_item_key(buf, &disk_key, 0); else - parent_start = 0; + btrfs_node_key(buf, &disk_key, 0); - level = btrfs_header_level(buf); - nritems = btrfs_header_nritems(buf); - cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, - root->root_key.objectid, trans->transid, + cow = btrfs_alloc_free_block(trans, root, buf->len, + root->root_key.objectid, &disk_key, level, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -158,32 +286,28 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); - btrfs_set_header_owner(cow, root->root_key.objectid); - btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); + btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); + btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | + BTRFS_HEADER_FLAG_RELOC); + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); + else + btrfs_set_header_owner(cow, root->root_key.objectid); - WARN_ON(btrfs_header_generation(buf) > trans->transid); - if (btrfs_header_generation(buf) != trans->transid) { - different_trans = 1; - ret = btrfs_inc_ref(trans, root, buf, cow, NULL); - if (ret) - return ret; - } else { - ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); - if (ret) - return ret; - clean_tree_block(trans, root, buf); - } + write_extent_buffer(cow, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); + + WARN_ON(!(buf->flags & EXTENT_BAD_TRANSID) && + btrfs_header_generation(buf) > trans->transid); + + update_ref_for_cow(trans, root, buf, cow); if (buf == root->node) { root->node = cow; extent_buffer_get(cow); - if (buf != root->commit_root) { - btrfs_free_extent(trans, root, buf->start, - buf->len, buf->start, - root->root_key.objectid, - btrfs_header_generation(buf), - 0, 0, 1); - } + + btrfs_free_extent(trans, root, buf->start, buf->len, + 0, root->root_key.objectid, level, 0); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { @@ -194,9 +318,13 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid); btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); + btrfs_free_extent(trans, root, buf->start, buf->len, - parent_start, btrfs_header_owner(parent), - btrfs_header_generation(parent), 0, 0, 1); + 0, root->root_key.objectid, level, 1); + } + if (!list_empty(&buf->recow)) { + list_del_init(&buf->recow); + free_extent_buffer(buf); } free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); @@ -204,6 +332,18 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } +static inline int should_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf) +{ + if (btrfs_header_generation(buf) == trans->transid && + !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && + !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) + return 0; + return 1; +} + int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -224,181 +364,44 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, (unsigned long long)root->fs_info->generation); WARN_ON(1); } - if (btrfs_header_generation(buf) == trans->transid && - !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { + if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; return 0; } - search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); + search_start = buf->start & ~((u64)SZ_1G - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); return ret; } -/* -static int close_blocks(u64 blocknr, u64 other, u32 blocksize) +int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) { - if (blocknr < other && other - (blocknr + blocksize) < 32768) + if (k1->objectid > k2->objectid) return 1; - if (blocknr > other && blocknr - (other + blocksize) < 32768) + if (k1->objectid < k2->objectid) + return -1; + if (k1->type > k2->type) + return 1; + if (k1->type < k2->type) + return -1; + if (k1->offset > k2->offset) return 1; + if (k1->offset < k2->offset) + return -1; return 0; } -*/ /* * compare two keys in a memcmp fashion */ -int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) +static int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) { struct btrfs_key k1; btrfs_disk_key_to_cpu(&k1, disk); - - if (k1.objectid > k2->objectid) - return 1; - if (k1.objectid < k2->objectid) - return -1; - if (k1.type > k2->type) - return 1; - if (k1.type < k2->type) - return -1; - if (k1.offset > k2->offset) - return 1; - if (k1.offset < k2->offset) - return -1; - return 0; -} - - -#if 0 -int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *parent, - int start_slot, int cache_only, u64 *last_ret, - struct btrfs_key *progress) -{ - struct extent_buffer *cur; - struct extent_buffer *tmp; - u64 blocknr; - u64 gen; - u64 search_start = *last_ret; - u64 last_block = 0; - u64 other; - u32 parent_nritems; - int end_slot; - int i; - int err = 0; - int parent_level; - int uptodate; - u32 blocksize; - int progress_passed = 0; - struct btrfs_disk_key disk_key; - - parent_level = btrfs_header_level(parent); - if (cache_only && parent_level != 1) - return 0; - - if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->running_transaction->transid); - WARN_ON(1); - } - if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->generation); - WARN_ON(1); - } - - parent_nritems = btrfs_header_nritems(parent); - blocksize = btrfs_level_size(root, parent_level - 1); - end_slot = parent_nritems; - - if (parent_nritems == 1) - return 0; - - for (i = start_slot; i < end_slot; i++) { - int close = 1; - - if (!parent->map_token) { - map_extent_buffer(parent, - btrfs_node_key_ptr_offset(i), - sizeof(struct btrfs_key_ptr), - &parent->map_token, &parent->kaddr, - &parent->map_start, &parent->map_len, - KM_USER1); - } - btrfs_node_key(parent, &disk_key, i); - if (!progress_passed && comp_keys(&disk_key, progress) < 0) - continue; - - progress_passed = 1; - blocknr = btrfs_node_blockptr(parent, i); - gen = btrfs_node_ptr_generation(parent, i); - if (last_block == 0) - last_block = blocknr; - - if (i > 0) { - other = btrfs_node_blockptr(parent, i - 1); - close = close_blocks(blocknr, other, blocksize); - } - if (close && i < end_slot - 2) { - other = btrfs_node_blockptr(parent, i + 1); - close = close_blocks(blocknr, other, blocksize); - } - if (close) { - last_block = blocknr; - continue; - } - if (parent->map_token) { - unmap_extent_buffer(parent, parent->map_token, - KM_USER1); - parent->map_token = NULL; - } - - cur = btrfs_find_tree_block(root, blocknr, blocksize); - if (cur) - uptodate = btrfs_buffer_uptodate(cur, gen); - else - uptodate = 0; - if (!cur || !uptodate) { - if (cache_only) { - free_extent_buffer(cur); - continue; - } - if (!cur) { - cur = read_tree_block(root, blocknr, - blocksize, gen); - } else if (!uptodate) { - btrfs_read_buffer(cur, gen); - } - } - if (search_start == 0) - search_start = last_block; - - err = __btrfs_cow_block(trans, root, cur, parent, i, - &tmp, search_start, - min(16 * blocksize, - (end_slot - i) * blocksize)); - if (err) { - free_extent_buffer(cur); - break; - } - search_start = tmp->start; - last_block = tmp->start; - *last_ret = search_start; - if (parent_level == 1) - btrfs_clear_buffer_defrag(tmp); - free_extent_buffer(tmp); - } - if (parent->map_token) { - unmap_extent_buffer(parent, parent->map_token, - KM_USER1); - parent->map_token = NULL; - } - return err; + return btrfs_comp_cpu_keys(&k1, k2); } -#endif /* * The leaf data grows from end-to-front in the node. @@ -414,156 +417,162 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, return btrfs_item_offset_nr(leaf, nr - 1); } -static int check_node(struct btrfs_root *root, struct btrfs_path *path, - int level) +enum btrfs_tree_block_status +btrfs_check_node(struct btrfs_root *root, struct btrfs_disk_key *parent_key, + struct extent_buffer *buf) { - struct extent_buffer *parent = NULL; - struct extent_buffer *node = path->nodes[level]; - struct btrfs_disk_key parent_key; - struct btrfs_disk_key node_key; - int parent_slot; - int slot; + int i; struct btrfs_key cpukey; - u32 nritems = btrfs_header_nritems(node); + struct btrfs_disk_key key; + u32 nritems = btrfs_header_nritems(buf); + enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS; - if (path->nodes[level + 1]) - parent = path->nodes[level + 1]; + if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root)) + goto fail; - slot = path->slots[level]; - BUG_ON(nritems == 0); - if (parent) { - parent_slot = path->slots[level + 1]; - btrfs_node_key(parent, &parent_key, parent_slot); - btrfs_node_key(node, &node_key, 0); - BUG_ON(memcmp(&parent_key, &node_key, - sizeof(struct btrfs_disk_key))); - BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_bytenr(node)); - } - BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); - if (slot != 0) { - btrfs_node_key_to_cpu(node, &cpukey, slot - 1); - btrfs_node_key(node, &node_key, slot); - BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0); + ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY; + if (parent_key && parent_key->type) { + btrfs_node_key(buf, &key, 0); + if (memcmp(parent_key, &key, sizeof(key))) + goto fail; } - if (slot < nritems - 1) { - btrfs_node_key_to_cpu(node, &cpukey, slot + 1); - btrfs_node_key(node, &node_key, slot); - BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0); + ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER; + for (i = 0; nritems > 1 && i < nritems - 2; i++) { + btrfs_node_key(buf, &key, i); + btrfs_node_key_to_cpu(buf, &cpukey, i + 1); + if (btrfs_comp_keys(&key, &cpukey) >= 0) + goto fail; + } + return BTRFS_TREE_BLOCK_CLEAN; +fail: + if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) { + if (parent_key) + btrfs_disk_key_to_cpu(&cpukey, parent_key); + else + btrfs_node_key_to_cpu(buf, &cpukey, 0); + btrfs_add_corrupt_extent_record(root->fs_info, &cpukey, + buf->start, buf->len, + btrfs_header_level(buf)); } - return 0; + return ret; } -static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, - int level) +enum btrfs_tree_block_status +btrfs_check_leaf(struct btrfs_root *root, struct btrfs_disk_key *parent_key, + struct extent_buffer *buf) { - struct extent_buffer *leaf = path->nodes[level]; - struct extent_buffer *parent = NULL; - int parent_slot; + int i; struct btrfs_key cpukey; - struct btrfs_disk_key parent_key; - struct btrfs_disk_key leaf_key; - int slot = path->slots[0]; + struct btrfs_disk_key key; + u32 nritems = btrfs_header_nritems(buf); + enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS; - u32 nritems = btrfs_header_nritems(leaf); + if (nritems * sizeof(struct btrfs_item) > buf->len) { + fprintf(stderr, "invalid number of items %llu\n", + (unsigned long long)buf->start); + goto fail; + } - if (path->nodes[level + 1]) - parent = path->nodes[level + 1]; + if (btrfs_header_level(buf) != 0) { + ret = BTRFS_TREE_BLOCK_INVALID_LEVEL; + fprintf(stderr, "leaf is not a leaf %llu\n", + (unsigned long long)btrfs_header_bytenr(buf)); + goto fail; + } + if (btrfs_leaf_free_space(root, buf) < 0) { + ret = BTRFS_TREE_BLOCK_INVALID_FREE_SPACE; + fprintf(stderr, "leaf free space incorrect %llu %d\n", + (unsigned long long)btrfs_header_bytenr(buf), + btrfs_leaf_free_space(root, buf)); + goto fail; + } if (nritems == 0) - return 0; - - if (parent) { - parent_slot = path->slots[level + 1]; - btrfs_node_key(parent, &parent_key, parent_slot); - btrfs_item_key(leaf, &leaf_key, 0); - - BUG_ON(memcmp(&parent_key, &leaf_key, - sizeof(struct btrfs_disk_key))); - BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_bytenr(leaf)); - } -#if 0 - for (i = 0; nritems > 1 && i < nritems - 2; i++) { - btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); - btrfs_item_key(leaf, &leaf_key, i); - if (comp_keys(&leaf_key, &cpukey) >= 0) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad key\n", i); - BUG_ON(1); - } - if (btrfs_item_offset_nr(leaf, i) != - btrfs_item_end_nr(leaf, i + 1)) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", i); - BUG_ON(1); + return BTRFS_TREE_BLOCK_CLEAN; + + btrfs_item_key(buf, &key, 0); + if (parent_key && parent_key->type && + memcmp(parent_key, &key, sizeof(key))) { + ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY; + fprintf(stderr, "leaf parent key incorrect %llu\n", + (unsigned long long)btrfs_header_bytenr(buf)); + goto fail; + } + for (i = 0; nritems > 1 && i < nritems - 1; i++) { + btrfs_item_key(buf, &key, i); + btrfs_item_key_to_cpu(buf, &cpukey, i + 1); + if (btrfs_comp_keys(&key, &cpukey) >= 0) { + ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER; + fprintf(stderr, "bad key ordering %d %d\n", i, i+1); + goto fail; } - if (i == 0) { - if (btrfs_item_offset_nr(leaf, i) + - btrfs_item_size_nr(leaf, i) != - BTRFS_LEAF_DATA_SIZE(root)) { - btrfs_print_leaf(root, leaf); - printk("slot %d first offset bad\n", i); - BUG_ON(1); - } + if (btrfs_item_offset_nr(buf, i) != + btrfs_item_end_nr(buf, i + 1)) { + ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS; + fprintf(stderr, "incorrect offsets %u %u\n", + btrfs_item_offset_nr(buf, i), + btrfs_item_end_nr(buf, i + 1)); + goto fail; } - } - if (nritems > 0) { - if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { - btrfs_print_leaf(root, leaf); - printk("slot %d bad size \n", nritems - 1); - BUG_ON(1); + if (i == 0 && btrfs_item_end_nr(buf, i) != + BTRFS_LEAF_DATA_SIZE(root)) { + ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS; + fprintf(stderr, "bad item end %u wanted %u\n", + btrfs_item_end_nr(buf, i), + (unsigned)BTRFS_LEAF_DATA_SIZE(root)); + goto fail; } } -#endif - if (slot != 0 && slot < nritems - 1) { - btrfs_item_key(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); - if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad key\n", slot); - BUG_ON(1); - } - if (btrfs_item_offset_nr(leaf, slot - 1) != - btrfs_item_end_nr(leaf, slot)) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", slot); - BUG_ON(1); + + for (i = 0; i < nritems; i++) { + if (btrfs_item_end_nr(buf, i) > BTRFS_LEAF_DATA_SIZE(root)) { + btrfs_item_key(buf, &key, 0); + btrfs_print_key(&key); + fflush(stdout); + ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS; + fprintf(stderr, "slot end outside of leaf %llu > %llu\n", + (unsigned long long)btrfs_item_end_nr(buf, i), + (unsigned long long)BTRFS_LEAF_DATA_SIZE(root)); + goto fail; } } - if (slot < nritems - 1) { - btrfs_item_key(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); - BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0); - if (btrfs_item_offset_nr(leaf, slot) != - btrfs_item_end_nr(leaf, slot + 1)) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", slot); - BUG_ON(1); - } + + return BTRFS_TREE_BLOCK_CLEAN; +fail: + if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) { + if (parent_key) + btrfs_disk_key_to_cpu(&cpukey, parent_key); + else + btrfs_item_key_to_cpu(buf, &cpukey, 0); + + btrfs_add_corrupt_extent_record(root->fs_info, &cpukey, + buf->start, buf->len, 0); } - BUG_ON(btrfs_item_offset_nr(leaf, 0) + - btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); - return 0; + return ret; } static int noinline check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - return 0; -#if 0 - struct extent_buffer *buf = path->nodes[level]; + struct btrfs_disk_key key; + struct btrfs_disk_key *key_ptr = NULL; + struct extent_buffer *parent; + enum btrfs_tree_block_status ret; - if (memcmp_extent_buffer(buf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(buf), - BTRFS_FSID_SIZE)) { - printk("warning bad block %Lu\n", buf->start); - return 1; + if (path->skip_check_block) + return 0; + if (path->nodes[level + 1]) { + parent = path->nodes[level + 1]; + btrfs_node_key(parent, &key, path->slots[level + 1]); + key_ptr = &key; } -#endif if (level == 0) - return check_leaf(root, path, level); - return check_node(root, path, level); + ret = btrfs_check_leaf(root, key_ptr, path->nodes[0]); + else + ret = btrfs_check_node(root, key_ptr, path->nodes[level]); + if (ret == BTRFS_TREE_BLOCK_CLEAN) + return 0; + return -EIO; } /* @@ -614,23 +623,21 @@ static int generic_bin_search(struct extent_buffer *eb, unsigned long p, static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot) { - if (level == 0) { + if (level == 0) return generic_bin_search(eb, offsetof(struct btrfs_leaf, items), sizeof(struct btrfs_item), key, btrfs_header_nritems(eb), slot); - } else { + else return generic_bin_search(eb, offsetof(struct btrfs_node, ptrs), sizeof(struct btrfs_key_ptr), key, btrfs_header_nritems(eb), slot); - } - return -1; } -static struct extent_buffer *read_node_slot(struct btrfs_root *root, +struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); @@ -639,10 +646,10 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, if (slot >= btrfs_header_nritems(parent)) return NULL; - BUG_ON(level == 0); + if (level == 0) + return NULL; - return read_tree_block(root, btrfs_node_blockptr(parent, slot), - btrfs_level_size(root, level - 1), + return read_tree_block(fs_info, btrfs_node_blockptr(parent, slot), btrfs_node_ptr_generation(parent, slot)); } @@ -654,11 +661,11 @@ static int balance_level(struct btrfs_trans_handle *trans, struct extent_buffer *mid; struct extent_buffer *left = NULL; struct extent_buffer *parent = NULL; + struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; int wret; int pslot; int orig_slot = path->slots[level]; - int err_on_enospc = 0; u64 orig_ptr; if (level == 0) @@ -669,9 +676,10 @@ static int balance_level(struct btrfs_trans_handle *trans, orig_ptr = btrfs_node_blockptr(mid, orig_slot); - if (level < BTRFS_MAX_LEVEL - 1) + if (level < BTRFS_MAX_LEVEL - 1) { parent = path->nodes[level + 1]; - pslot = path->slots[level + 1]; + pslot = path->slots[level + 1]; + } /* * deal with the case where there is only one pointer in the root @@ -684,29 +692,21 @@ static int balance_level(struct btrfs_trans_handle *trans, return 0; /* promote the child to a root */ - child = read_node_slot(root, mid, 0); - BUG_ON(!child); + child = read_node_slot(fs_info, mid, 0); + BUG_ON(!extent_buffer_uptodate(child)); ret = btrfs_cow_block(trans, root, child, mid, 0, &child); BUG_ON(ret); root->node = child; - - ret = btrfs_update_extent_ref(trans, root, child->start, - mid->start, child->start, - root->root_key.objectid, - trans->transid, - level - 1, 0); - BUG_ON(ret); - add_root_to_dirty_list(root); path->nodes[level] = NULL; clean_tree_block(trans, root, mid); - wait_on_tree_block_writeback(root, mid); /* once for the path */ free_extent_buffer(mid); + ret = btrfs_free_extent(trans, root, mid->start, mid->len, - mid->start, root->root_key.objectid, - btrfs_header_generation(mid), 0, 0, 1); + 0, root->root_key.objectid, + level, 1); /* once for the root ptr */ free_extent_buffer(mid); return ret; @@ -715,11 +715,8 @@ static int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - if (btrfs_header_nritems(mid) < 2) - err_on_enospc = 1; - - left = read_node_slot(root, parent, pslot - 1); - if (left) { + left = read_node_slot(fs_info, parent, pslot - 1); + if (extent_buffer_uptodate(left)) { wret = btrfs_cow_block(trans, root, left, parent, pslot - 1, &left); if (wret) { @@ -727,8 +724,8 @@ static int balance_level(struct btrfs_trans_handle *trans, goto enospc; } } - right = read_node_slot(root, parent, pslot + 1); - if (right) { + right = read_node_slot(fs_info, parent, pslot + 1); + if (extent_buffer_uptodate(right)) { wret = btrfs_cow_block(trans, root, right, parent, pslot + 1, &right); if (wret) { @@ -743,8 +740,6 @@ static int balance_level(struct btrfs_trans_handle *trans, wret = push_node_left(trans, root, left, mid, 1); if (wret < 0) ret = wret; - if (btrfs_header_nritems(mid) < 2) - err_on_enospc = 1; } /* @@ -756,21 +751,18 @@ static int balance_level(struct btrfs_trans_handle *trans, ret = wret; if (btrfs_header_nritems(right) == 0) { u64 bytenr = right->start; - u64 generation = btrfs_header_generation(parent); u32 blocksize = right->len; clean_tree_block(trans, root, right); - wait_on_tree_block_writeback(root, right); free_extent_buffer(right); right = NULL; - wret = del_ptr(trans, root, path, level + 1, pslot + - 1); + wret = btrfs_del_ptr(root, path, level + 1, pslot + 1); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - btrfs_header_owner(parent), - generation, 0, 0, 1); + blocksize, 0, + root->root_key.objectid, + level, 0); if (wret) ret = wret; } else { @@ -805,20 +797,17 @@ static int balance_level(struct btrfs_trans_handle *trans, } if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 root_gen = btrfs_header_generation(parent); u64 bytenr = mid->start; u32 blocksize = mid->len; clean_tree_block(trans, root, mid); - wait_on_tree_block_writeback(root, mid); free_extent_buffer(mid); mid = NULL; - wret = del_ptr(trans, root, path, level + 1, pslot); + wret = btrfs_del_ptr(root, path, level + 1, pslot); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, blocksize, - parent->start, - btrfs_header_owner(parent), - root_gen, 0, 0, 1); + 0, root->root_key.objectid, + level, 0); if (wret) ret = wret; } else { @@ -865,30 +854,30 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, struct extent_buffer *mid; struct extent_buffer *left = NULL; struct extent_buffer *parent = NULL; + struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; int wret; int pslot; int orig_slot = path->slots[level]; - u64 orig_ptr; if (level == 0) return 1; mid = path->nodes[level]; WARN_ON(btrfs_header_generation(mid) != trans->transid); - orig_ptr = btrfs_node_blockptr(mid, orig_slot); - if (level < BTRFS_MAX_LEVEL - 1) + if (level < BTRFS_MAX_LEVEL - 1) { parent = path->nodes[level + 1]; - pslot = path->slots[level + 1]; + pslot = path->slots[level + 1]; + } if (!parent) return 1; - left = read_node_slot(root, parent, pslot - 1); + left = read_node_slot(fs_info, parent, pslot - 1); /* first, try to make some room in the middle buffer */ - if (left) { + if (extent_buffer_uptodate(left)) { u32 left_nr; left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { @@ -926,12 +915,12 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, } free_extent_buffer(left); } - right= read_node_slot(root, parent, pslot + 1); + right= read_node_slot(fs_info, parent, pslot + 1); /* * then try to empty the right most buffer into the middle */ - if (right) { + if (extent_buffer_uptodate(right)) { u32 right_nr; right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { @@ -975,9 +964,10 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, /* * readahead one full node of leaves */ -static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, +void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot, u64 objectid) { + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *node; struct btrfs_disk_key disk_key; u32 nritems; @@ -988,7 +978,6 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int direction = path->reada; struct extent_buffer *eb; u32 nr; - u32 blocksize; u32 nscan = 0; if (level != 1) @@ -999,8 +988,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - blocksize = btrfs_level_size(root, level - 1); - eb = btrfs_find_tree_block(root, search, blocksize); + eb = btrfs_find_tree_block(fs_info, search, fs_info->nodesize); if (eb) { free_extent_buffer(eb); return; @@ -1030,14 +1018,14 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, if ((search >= lowest_read && search <= highest_read) || (search < lowest_read && lowest_read - search <= 32768) || (search > highest_read && search - highest_read <= 32768)) { - readahead_tree_block(root, search, blocksize, + readahead_tree_block(fs_info, search, btrfs_node_ptr_generation(node, nr)); - nread += blocksize; + nread += fs_info->nodesize; } nscan++; - if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32)) + if (path->reada < 2 && (nread > SZ_256K || nscan > 32)) break; - if(nread > (1024 * 1024) || nscan > 128) + if(nread > SZ_1M || nscan > 128) break; if (search < lowest_read) @@ -1047,6 +1035,51 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, } } +int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, + u64 iobjectid, u64 ioff, u8 key_type, + struct btrfs_key *found_key) +{ + int ret; + struct btrfs_key key; + struct extent_buffer *eb; + struct btrfs_path *path; + + key.type = key_type; + key.objectid = iobjectid; + key.offset = ioff; + + if (found_path == NULL) { + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + } else + path = found_path; + + ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); + if ((ret < 0) || (found_key == NULL)) + goto out; + + eb = path->nodes[0]; + if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(fs_root, path); + if (ret) + goto out; + eb = path->nodes[0]; + } + + btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); + if (found_key->type != key.type || + found_key->objectid != key.objectid) { + ret = 1; + goto out; + } + +out: + if (path != found_path) + btrfs_free_path(path); + return ret; +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -1069,10 +1102,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int ret; int level; int should_reada = p->reada; + struct btrfs_fs_info *fs_info = root->fs_info; u8 lowest_level = 0; lowest_level = p->lowest_level; - WARN_ON(lowest_level && ins_len); + WARN_ON(lowest_level && ins_len > 0); WARN_ON(p->nodes[0] != NULL); /* WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); @@ -1106,7 +1140,8 @@ again: if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(b) >= + if ((p->search_for_split || ins_len > 0) && + btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); @@ -1121,7 +1156,7 @@ again: return sret; b = p->nodes[level]; if (!b) { - btrfs_release_path(NULL, p); + btrfs_release_path(p); goto again; } slot = p->slots[level]; @@ -1135,11 +1170,13 @@ again: reada_for_search(root, p, level, slot, key->objectid); - b = read_node_slot(root, b, slot); + b = read_node_slot(fs_info, b, slot); + if (!extent_buffer_uptodate(b)) + return -EIO; } else { p->slots[level] = slot; - if (ins_len > 0 && btrfs_leaf_free_space(root, b) < - sizeof(struct btrfs_item) + ins_len) { + if (ins_len > 0 && + ins_len > btrfs_leaf_free_space(root, b)) { int sret = split_leaf(trans, root, key, p, ins_len, ret == 0); BUG_ON(sret > 0); @@ -1158,16 +1195,11 @@ again: * This is used after shifting pointers to the left, so it stops * fixing up pointers when a given leaf/node is not in slot 0 of the * higher levels - * - * If this fails to write a tree block, it returns -1, but continues - * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, +void btrfs_fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, int level) { int i; - int ret = 0; struct extent_buffer *t; for (i = level; i < BTRFS_MAX_LEVEL; i++) { @@ -1180,7 +1212,6 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, if (tslot != 0) break; } - return ret; } /* @@ -1189,8 +1220,7 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, * This function isn't completely safe. It's the caller's responsibility * that the new key won't break the order */ -int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, +int btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *new_key) { struct btrfs_disk_key disk_key; @@ -1214,11 +1244,33 @@ int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, btrfs_set_item_key(eb, &disk_key, slot); btrfs_mark_buffer_dirty(eb); if (slot == 0) - fixup_low_keys(trans, root, path, &disk_key, 1); + btrfs_fixup_low_keys(root, path, &disk_key, 1); return 0; } /* + * update an item key without the safety checks. This is meant to be called by + * fsck only. + */ +void btrfs_set_item_key_unsafe(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key) +{ + struct btrfs_disk_key disk_key; + struct extent_buffer *eb; + int slot; + + eb = path->nodes[0]; + slot = path->slots[0]; + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(eb, &disk_key, slot); + btrfs_mark_buffer_dirty(eb); + if (slot == 0) + btrfs_fixup_low_keys(root, path, &disk_key, 1); +} + +/* * try to push data from one node into the next node left in the * tree. * @@ -1278,8 +1330,6 @@ static int push_node_left(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); - ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items); - BUG_ON(ret); return ret; } @@ -1342,8 +1392,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); - ret = btrfs_update_ref(trans, root, src, dst, 0, push_items); - BUG_ON(ret); return ret; } @@ -1363,7 +1411,6 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, struct extent_buffer *c; struct extent_buffer *old; struct btrfs_disk_key lower_key; - int ret; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); @@ -1374,26 +1421,26 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); - c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, - root->root_key.objectid, - trans->transid, level, - root->node->start, 0); + c = btrfs_alloc_free_block(trans, root, root->fs_info->nodesize, + root->root_key.objectid, &lower_key, + level, root->node->start, 0); + if (IS_ERR(c)) return PTR_ERR(c); - memset_extent_buffer(c, 0, 0, root->nodesize); + memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); btrfs_set_header_nritems(c, 1); btrfs_set_header_level(c, level); btrfs_set_header_bytenr(c, c->start); btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(c, root->root_key.objectid); write_extent_buffer(c, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(c), - BTRFS_FSID_SIZE); + btrfs_header_fsid(), BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(c), + btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE); btrfs_set_node_key(c, &lower_key, 0); @@ -1408,12 +1455,6 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, old = root->node; root->node = c; - ret = btrfs_update_extent_ref(trans, root, lower->start, - lower->start, c->start, - root->root_key.objectid, - trans->transid, level - 1, 0); - BUG_ON(ret); - /* the super has an extra ref to root->node */ free_extent_buffer(old); @@ -1447,7 +1488,8 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root BUG(); if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); - if (slot != nritems) { + if (slot < nritems) { + /* shift the items */ memmove_extent_buffer(lower, btrfs_node_key_ptr_offset(slot + 1), btrfs_node_key_ptr_offset(slot), @@ -1500,29 +1542,27 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(c); + mid = (c_nritems + 1) / 2; + btrfs_node_key(c, &disk_key, mid); - btrfs_node_key(c, &disk_key, 0); - split = btrfs_alloc_free_block(trans, root, root->nodesize, - path->nodes[level + 1]->start, - root->root_key.objectid, - trans->transid, level, c->start, 0); + split = btrfs_alloc_free_block(trans, root, root->fs_info->nodesize, + root->root_key.objectid, + &disk_key, level, c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); - btrfs_set_header_flags(split, btrfs_header_flags(c)); + memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); btrfs_set_header_level(split, btrfs_header_level(c)); btrfs_set_header_bytenr(split, split->start); btrfs_set_header_generation(split, trans->transid); + btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(split, root->root_key.objectid); - btrfs_set_header_flags(split, 0); write_extent_buffer(split, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(split), - BTRFS_FSID_SIZE); + btrfs_header_fsid(), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(split), + btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); - mid = (c_nritems + 1) / 2; copy_extent_buffer(split, c, btrfs_node_key_ptr_offset(0), @@ -1535,16 +1575,12 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(c); btrfs_mark_buffer_dirty(split); - btrfs_node_key(split, &disk_key, 0); wret = insert_ptr(trans, root, path, &disk_key, split->start, path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; - ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid); - BUG_ON(ret); - if (path->slots[level] >= mid) { path->slots[level] -= mid; free_extent_buffer(c); @@ -1583,13 +1619,14 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) */ int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) { + u32 nodesize = (root ? BTRFS_LEAF_DATA_SIZE(root) : leaf->len); int nritems = btrfs_header_nritems(leaf); int ret; - ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + ret = nodesize - leaf_space_used(leaf, 0, nritems); if (ret < 0) { - printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", - ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), - leaf_space_used(leaf, 0, nritems), nritems); + printk("leaf free space ret %d, leaf data size %u, used %d nritems %d\n", + ret, nodesize, leaf_space_used(leaf, 0, nritems), + nritems); } return ret; } @@ -1609,6 +1646,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root struct extent_buffer *right; struct extent_buffer *upper; struct btrfs_disk_key disk_key; + struct btrfs_fs_info *fs_info = root->fs_info; int slot; u32 i; int free_space; @@ -1630,9 +1668,14 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - right = read_node_slot(root, upper, slot + 1); + right = read_node_slot(fs_info, upper, slot + 1); + if (!extent_buffer_uptodate(right)) { + if (IS_ERR(right)) + return PTR_ERR(right); + return -EIO; + } free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size + sizeof(struct btrfs_item)) { + if (free_space < data_size) { free_extent_buffer(right); return 1; } @@ -1645,7 +1688,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size + sizeof(struct btrfs_item)) { + if (free_space < data_size) { free_extent_buffer(right); return 1; } @@ -1663,7 +1706,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root i = left_nritems - 1; while (i >= nr) { - item = btrfs_item_nr(left, i); + item = btrfs_item_nr(i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -1719,7 +1762,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - item = btrfs_item_nr(right, i); + item = btrfs_item_nr(i); push_space -= btrfs_item_size(right, item); btrfs_set_item_offset(right, item, push_space); } @@ -1735,9 +1778,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_node_key(upper, &disk_key, slot + 1); btrfs_mark_buffer_dirty(upper); - ret = btrfs_update_ref(trans, root, left, right, 0, push_items); - BUG_ON(ret); - /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; @@ -1760,6 +1800,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_disk_key disk_key; struct extent_buffer *right = path->nodes[0]; struct extent_buffer *left; + struct btrfs_fs_info *fs_info = root->fs_info; int slot; int i; int free_space; @@ -1770,7 +1811,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root u32 right_nritems; u32 nr; int ret = 0; - int wret; u32 this_item_size; u32 old_left_item_size; @@ -1785,9 +1825,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } - left = read_node_slot(root, path->nodes[1], slot - 1); + left = read_node_slot(fs_info, path->nodes[1], slot - 1); free_space = btrfs_leaf_free_space(root, left); - if (free_space < data_size + sizeof(struct btrfs_item)) { + if (free_space < data_size) { free_extent_buffer(left); return 1; } @@ -1802,7 +1842,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } free_space = btrfs_leaf_free_space(root, left); - if (free_space < data_size + sizeof(struct btrfs_item)) { + if (free_space < data_size) { free_extent_buffer(left); return 1; } @@ -1813,7 +1853,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root nr = right_nritems - 1; for (i = 0; i < nr; i++) { - item = btrfs_item_nr(right, i); + item = btrfs_item_nr(i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -1848,13 +1888,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_item_offset_nr(right, push_items - 1), push_space); old_left_nritems = btrfs_header_nritems(left); - BUG_ON(old_left_nritems < 0); + BUG_ON(old_left_nritems == 0); old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { u32 ioff; - item = btrfs_item_nr(left, i); + item = btrfs_item_nr(i); ioff = btrfs_item_offset(left, item); btrfs_set_item_offset(left, item, ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); @@ -1884,7 +1924,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - item = btrfs_item_nr(right, i); + item = btrfs_item_nr(i); push_space = push_space - btrfs_item_size(right, item); btrfs_set_item_offset(right, item, push_space); } @@ -1894,13 +1934,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(right); btrfs_item_key(right, &disk_key, 0); - wret = fixup_low_keys(trans, root, path, &disk_key, 1); - if (wret) - ret = wret; - - ret = btrfs_update_ref(trans, root, right, left, - old_left_nritems, push_items); - BUG_ON(ret); + btrfs_fixup_low_keys(root, path, &disk_key, 1); /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { @@ -1922,34 +1956,102 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root * * returns 0 if all went well and < 0 on failure. */ -static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *ins_key, - struct btrfs_path *path, int data_size, int extend) +static noinline int copy_for_split(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *l, + struct extent_buffer *right, + int slot, int mid, int nritems) { + int data_copy_size; + int rt_data_off; + int i; + int ret = 0; + int wret; + struct btrfs_disk_key disk_key; + + nritems = nritems - mid; + btrfs_set_header_nritems(right, nritems); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + copy_extent_buffer(right, l, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(mid), + nritems * sizeof(struct btrfs_item)); + + copy_extent_buffer(right, l, + btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + data_copy_size, btrfs_leaf_data(l) + + leaf_data_end(root, l), data_copy_size); + + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_end_nr(l, mid); + + for (i = 0; i < nritems; i++) { + struct btrfs_item *item = btrfs_item_nr(i); + u32 ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); + } + + btrfs_set_header_nritems(l, mid); + ret = 0; + btrfs_item_key(right, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, right->start, + path->slots[1] + 1, 1); + if (wret) + ret = wret; + + btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(l); + BUG_ON(path->slots[0] != slot); + + if (mid <= slot) { + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; + path->slots[0] -= mid; + path->slots[1] += 1; + } else { + free_extent_buffer(right); + } + + BUG_ON(path->slots[0] < 0); + + return ret; +} + +/* + * split the path's leaf in two, making sure there is at least data_size + * available for the resulting leaf level of the path. + * + * returns 0 if all went well and < 0 on failure. + */ +static noinline int split_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size, + int extend) +{ + struct btrfs_disk_key disk_key; struct extent_buffer *l; u32 nritems; int mid; int slot; struct extent_buffer *right; - int space_needed = data_size + sizeof(struct btrfs_item); - int data_copy_size; - int rt_data_off; - int i; int ret = 0; int wret; - int double_split; + int split; int num_doubles = 0; - struct btrfs_disk_key disk_key; - if (extend) - space_needed = data_size; + l = path->nodes[0]; + slot = path->slots[0]; + if (extend && data_size + btrfs_item_size_nr(l, slot) + + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) + return -EOVERFLOW; /* first try to make some room by pushing left and right */ - if (ins_key->type != BTRFS_DIR_ITEM_KEY) { + if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); - if (wret < 0) { + if (wret < 0) return wret; - } if (wret) { wret = push_leaf_left(trans, root, path, data_size, 0); if (wret < 0) @@ -1958,7 +2060,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root l = path->nodes[0]; /* did the pushes work? */ - if (btrfs_leaf_free_space(root, l) >= space_needed) + if (btrfs_leaf_free_space(root, l) >= data_size) return 0; } @@ -1968,155 +2070,236 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } again: - double_split = 0; + split = 1; l = path->nodes[0]; slot = path->slots[0]; nritems = btrfs_header_nritems(l); - mid = (nritems + 1)/ 2; - - right = btrfs_alloc_free_block(trans, root, root->leafsize, - path->nodes[1]->start, - root->root_key.objectid, - trans->transid, 0, l->start, 0); - if (IS_ERR(right)) { - BUG_ON(1); - return PTR_ERR(right); - } + mid = (nritems + 1) / 2; - memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_bytenr(right, right->start); - btrfs_set_header_generation(right, trans->transid); - btrfs_set_header_owner(right, root->root_key.objectid); - btrfs_set_header_level(right, 0); - write_extent_buffer(right, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(right), - BTRFS_FSID_SIZE); - - write_extent_buffer(right, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(right), - BTRFS_UUID_SIZE); if (mid <= slot) { if (nritems == 1 || - leaf_space_used(l, mid, nritems - mid) + space_needed > + leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { if (slot >= nritems) { - btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(right, 0); - wret = insert_ptr(trans, root, path, - &disk_key, right->start, - path->slots[1] + 1, 1); - if (wret) - ret = wret; - free_extent_buffer(path->nodes[0]); - path->nodes[0] = right; - path->slots[0] = 0; - path->slots[1] += 1; - return ret; - } - mid = slot; - if (mid != nritems && - leaf_space_used(l, mid, nritems - mid) + - space_needed > BTRFS_LEAF_DATA_SIZE(root)) { - double_split = 1; + split = 0; + } else { + mid = slot; + if (mid != nritems && + leaf_space_used(l, mid, nritems - mid) + + data_size > BTRFS_LEAF_DATA_SIZE(root)) { + split = 2; + } } } } else { - if (leaf_space_used(l, 0, mid + 1) + space_needed > + if (leaf_space_used(l, 0, mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { - if (!extend && slot == 0) { - btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(right, 0); - wret = insert_ptr(trans, root, path, - &disk_key, - right->start, - path->slots[1], 1); - if (wret) - ret = wret; - free_extent_buffer(path->nodes[0]); - path->nodes[0] = right; - path->slots[0] = 0; - if (path->slots[1] == 0) { - wret = fixup_low_keys(trans, root, - path, &disk_key, 1); - if (wret) - ret = wret; - } - return ret; - } else if (extend && slot == 0) { + if (!extend && data_size && slot == 0) { + split = 0; + } else if ((extend || !data_size) && slot == 0) { mid = 1; } else { mid = slot; if (mid != nritems && leaf_space_used(l, mid, nritems - mid) + - space_needed > BTRFS_LEAF_DATA_SIZE(root)) { - double_split = 1; + data_size > BTRFS_LEAF_DATA_SIZE(root)) { + split = 2 ; } } } } - nritems = nritems - mid; - btrfs_set_header_nritems(right, nritems); - data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + if (split == 0) + btrfs_cpu_key_to_disk(&disk_key, ins_key); + else + btrfs_item_key(l, &disk_key, mid); - copy_extent_buffer(right, l, btrfs_item_nr_offset(0), - btrfs_item_nr_offset(mid), - nritems * sizeof(struct btrfs_item)); + right = btrfs_alloc_free_block(trans, root, root->fs_info->nodesize, + root->root_key.objectid, + &disk_key, 0, l->start, 0); + if (IS_ERR(right)) { + BUG_ON(1); + return PTR_ERR(right); + } - copy_extent_buffer(right, l, - btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - - data_copy_size, btrfs_leaf_data(l) + - leaf_data_end(root, l), data_copy_size); + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_bytenr(right, right->start); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); - rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_end_nr(l, mid); + write_extent_buffer(right, root->fs_info->chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(right), + BTRFS_UUID_SIZE); - for (i = 0; i < nritems; i++) { - struct btrfs_item *item = btrfs_item_nr(right, i); - u32 ioff = btrfs_item_offset(right, item); - btrfs_set_item_offset(right, item, ioff + rt_data_off); + if (split == 0) { + if (mid <= slot) { + btrfs_set_header_nritems(right, 0); + wret = insert_ptr(trans, root, path, + &disk_key, right->start, + path->slots[1] + 1, 1); + if (wret) + ret = wret; + + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; + path->slots[0] = 0; + path->slots[1] += 1; + } else { + btrfs_set_header_nritems(right, 0); + wret = insert_ptr(trans, root, path, + &disk_key, + right->start, + path->slots[1], 1); + if (wret) + ret = wret; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; + path->slots[0] = 0; + if (path->slots[1] == 0) { + btrfs_fixup_low_keys(root, path, + &disk_key, 1); + } + } + btrfs_mark_buffer_dirty(right); + return ret; } - btrfs_set_header_nritems(l, mid); - ret = 0; - btrfs_item_key(right, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1); - if (wret) - ret = wret; + ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); + BUG_ON(ret); - btrfs_mark_buffer_dirty(right); - btrfs_mark_buffer_dirty(l); - BUG_ON(path->slots[0] != slot); + if (split == 2) { + BUG_ON(num_doubles != 0); + num_doubles++; + goto again; + } - ret = btrfs_update_ref(trans, root, l, right, 0, nritems); + return ret; +} + +/* + * This function splits a single item into two items, + * giving 'new_key' to the new item and splitting the + * old one at split_offset (from the start of the item). + * + * The path may be released by this operation. After + * the split, the path is pointing to the old item. The + * new item is going to be in the same node as the old one. + * + * Note, the item being split must be smaller enough to live alone on + * a tree block with room for one extra struct btrfs_item + * + * This allows us to split the item in place, keeping a lock on the + * leaf the entire time. + */ +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset) +{ + u32 item_size; + struct extent_buffer *leaf; + struct btrfs_key orig_key; + struct btrfs_item *item; + struct btrfs_item *new_item; + int ret = 0; + int slot; + u32 nritems; + u32 orig_offset; + struct btrfs_disk_key disk_key; + char *buf; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); + if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) + goto split; + + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + btrfs_release_path(path); + + path->search_for_split = 1; + + ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); + path->search_for_split = 0; + + /* if our item isn't there or got smaller, return now */ + if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], + path->slots[0])) { + return -EAGAIN; + } + + ret = split_leaf(trans, root, &orig_key, path, 0, 0); BUG_ON(ret); - if (mid <= slot) { - free_extent_buffer(path->nodes[0]); - path->nodes[0] = right; - path->slots[0] -= mid; - path->slots[1] += 1; - } else - free_extent_buffer(right); + BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); + leaf = path->nodes[0]; - BUG_ON(path->slots[0] < 0); +split: + item = btrfs_item_nr(path->slots[0]); + orig_offset = btrfs_item_offset(leaf, item); + item_size = btrfs_item_size(leaf, item); - if (double_split) { - BUG_ON(num_doubles != 0); - num_doubles++; - goto again; + + buf = kmalloc(item_size, GFP_NOFS); + BUG_ON(!buf); + read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, + path->slots[0]), item_size); + slot = path->slots[0] + 1; + leaf = path->nodes[0]; + + nritems = btrfs_header_nritems(leaf); + + if (slot < nritems) { + /* shift the items */ + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), + (nritems - slot) * sizeof(struct btrfs_item)); + + } + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(leaf, &disk_key, slot); + + new_item = btrfs_item_nr(slot); + + btrfs_set_item_offset(leaf, new_item, orig_offset); + btrfs_set_item_size(leaf, new_item, item_size - split_offset); + + btrfs_set_item_offset(leaf, item, + orig_offset + item_size - split_offset); + btrfs_set_item_size(leaf, item, split_offset); + + btrfs_set_header_nritems(leaf, nritems + 1); + + /* write the data for the start of the original item */ + write_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, path->slots[0]), + split_offset); + + /* write the data for the new item */ + write_extent_buffer(leaf, buf + split_offset, + btrfs_item_ptr_offset(leaf, slot), + item_size - split_offset); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); + BUG(); } + kfree(buf); return ret; } -int btrfs_truncate_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, +int btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, u32 new_size, int from_end) { int ret = 0; int slot; - int slot_orig; struct extent_buffer *leaf; struct btrfs_item *item; u32 nritems; @@ -2126,7 +2309,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, unsigned int size_diff; int i; - slot_orig = path->slots[0]; leaf = path->nodes[0]; slot = path->slots[0]; @@ -2150,7 +2332,7 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, /* first correct the data pointers */ for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(leaf, i); + item = btrfs_item_nr(i); ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + size_diff); } @@ -2193,10 +2375,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, btrfs_set_disk_key_offset(&disk_key, offset + size_diff); btrfs_set_item_key(leaf, &disk_key, slot); if (slot == 0) - fixup_low_keys(trans, root, path, &disk_key, 1); + btrfs_fixup_low_keys(root, path, &disk_key, 1); } - item = btrfs_item_nr(leaf, slot); + item = btrfs_item_nr(slot); btrfs_set_item_size(leaf, item, new_size); btrfs_mark_buffer_dirty(leaf); @@ -2208,13 +2390,11 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, return ret; } -int btrfs_extend_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, +int btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path, u32 data_size) { int ret = 0; int slot; - int slot_orig; struct extent_buffer *leaf; struct btrfs_item *item; u32 nritems; @@ -2223,7 +2403,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, unsigned int old_size; int i; - slot_orig = path->slots[0]; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -2249,7 +2428,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, /* first correct the data pointers */ for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(leaf, i); + item = btrfs_item_nr(i); ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } @@ -2261,7 +2440,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, data_end = old_data; old_size = btrfs_item_size_nr(leaf, slot); - item = btrfs_item_nr(leaf, slot); + item = btrfs_item_nr(slot); btrfs_set_item_size(leaf, item, old_size + data_size); btrfs_mark_buffer_dirty(leaf); @@ -2287,7 +2466,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_item *item; int ret = 0; int slot; - int slot_orig; int i; u32 nritems; u32 total_size = 0; @@ -2303,7 +2481,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, if (!root->node) BUG(); - total_size = total_data + (nr - 1) * sizeof(struct btrfs_item); + total_size = total_data + nr * sizeof(struct btrfs_item); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); if (ret == 0) { return -EEXIST; @@ -2311,14 +2489,12 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, if (ret < 0) goto out; - slot_orig = path->slots[0]; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); - if (btrfs_leaf_free_space(root, leaf) < - sizeof(struct btrfs_item) + total_size) { + if (btrfs_leaf_free_space(root, leaf) < total_size) { btrfs_print_leaf(root, leaf); printk("not enough freespace need %u have %d\n", total_size, btrfs_leaf_free_space(root, leaf)); @@ -2328,8 +2504,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, slot = path->slots[0]; BUG_ON(slot < 0); - if (slot != nritems) { - int i; + if (slot < nritems) { unsigned int old_data = btrfs_item_end_nr(leaf, slot); if (old_data < data_end) { @@ -2345,7 +2520,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(leaf, i); + item = btrfs_item_nr(i); ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - total_data); } @@ -2366,7 +2541,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, for (i = 0; i < nr; i++) { btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); btrfs_set_item_key(leaf, &disk_key, slot + i); - item = btrfs_item_nr(leaf, slot + i); + item = btrfs_item_nr(slot + i); btrfs_set_item_offset(leaf, item, data_end - data_size[i]); data_end -= data_size[i]; btrfs_set_item_size(leaf, item, data_size[i]); @@ -2377,7 +2552,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, ret = 0; if (slot == 0) { btrfs_cpu_key_to_disk(&disk_key, cpu_key); - ret = fixup_low_keys(trans, root, path, &disk_key, 1); + btrfs_fixup_low_keys(root, path, &disk_key, 1); } if (btrfs_leaf_free_space(root, leaf) < 0) { @@ -2403,7 +2578,9 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root unsigned long ptr; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { leaf = path->nodes[0]; @@ -2422,16 +2599,16 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root * continuing all the way the root if required. The root is converted into * a leaf if all the nodes are emptied. */ -static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot) +int btrfs_del_ptr(struct btrfs_root *root, struct btrfs_path *path, + int level, int slot) { struct extent_buffer *parent = path->nodes[level]; u32 nritems; int ret = 0; - int wret; nritems = btrfs_header_nritems(parent); - if (slot != nritems -1) { + if (slot < nritems - 1) { + /* shift the items */ memmove_extent_buffer(parent, btrfs_node_key_ptr_offset(slot), btrfs_node_key_ptr_offset(slot + 1), @@ -2448,15 +2625,40 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_disk_key disk_key; btrfs_node_key(parent, &disk_key, 0); - wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); - if (wret) - ret = wret; + btrfs_fixup_low_keys(root, path, &disk_key, level + 1); } btrfs_mark_buffer_dirty(parent); return ret; } /* + * a helper function to delete the leaf pointed to by path->slots[1] and + * path->nodes[1]. + * + * This deletes the pointer in path->nodes[1] and frees the leaf + * block extent. zero is returned if it all worked out, < 0 otherwise. + * + * The path must have already been setup for deleting the leaf, including + * all the proper balancing. path->nodes[1] must be locked. + */ +static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *leaf) +{ + int ret; + + WARN_ON(btrfs_header_generation(leaf) != trans->transid); + ret = btrfs_del_ptr(root, path, 1, path->slots[1]); + if (ret) + return ret; + + ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, + 0, root->root_key.objectid, 0, 0); + return ret; +} + +/* * delete the item at the leaf level in path. If that empties * the leaf, remove it from the tree */ @@ -2481,7 +2683,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, nritems = btrfs_header_nritems(leaf); if (slot + nr != nritems) { - int i; int data_end = leaf_data_end(root, leaf); memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + @@ -2492,7 +2693,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, for (i = slot + nr; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(leaf, i); + item = btrfs_item_nr(i); ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + dsize); } @@ -2510,17 +2711,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (leaf == root->node) { btrfs_set_header_level(leaf, 0); } else { - u64 root_gen = btrfs_header_generation(path->nodes[1]); clean_tree_block(trans, root, leaf); - wait_on_tree_block_writeback(root, leaf); - wret = del_ptr(trans, root, path, 1, path->slots[1]); - if (wret) - ret = wret; - wret = btrfs_free_extent(trans, root, - leaf->start, leaf->len, - path->nodes[1]->start, - btrfs_header_owner(path->nodes[1]), - root_gen, 0, 0, 1); + wret = btrfs_del_leaf(trans, root, path, leaf); + BUG_ON(ret); if (wret) ret = wret; } @@ -2530,10 +2723,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_disk_key disk_key; btrfs_item_key(leaf, &disk_key, 0); - wret = fixup_low_keys(trans, root, path, - &disk_key, 1); - if (wret) - ret = wret; + btrfs_fixup_low_keys(root, path, &disk_key, 1); } /* delete the leaf if it is mostly empty */ @@ -2557,27 +2747,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } if (btrfs_header_nritems(leaf) == 0) { - u64 root_gen; - u64 bytenr = leaf->start; - u32 blocksize = leaf->len; - - root_gen = btrfs_header_generation( - path->nodes[1]); - clean_tree_block(trans, root, leaf); - wait_on_tree_block_writeback(root, leaf); - - wret = del_ptr(trans, root, path, 1, slot); - if (wret) - ret = wret; - + path->slots[1] = slot; + ret = btrfs_del_leaf(trans, root, path, leaf); + BUG_ON(ret); free_extent_buffer(leaf); - wret = btrfs_free_extent(trans, root, bytenr, - blocksize, path->nodes[1]->start, - btrfs_header_owner(path->nodes[1]), - root_gen, 0, 0, 1); - if (wret) - ret = wret; + } else { btrfs_mark_buffer_dirty(leaf); free_extent_buffer(leaf); @@ -2600,6 +2775,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) int level = 1; struct extent_buffer *c; struct extent_buffer *next = NULL; + struct btrfs_fs_info *fs_info = root->fs_info; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) @@ -2615,10 +2791,12 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) } slot--; - if (next) - free_extent_buffer(next); - - next = read_node_slot(root, c, slot); + next = read_node_slot(fs_info, c, slot); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + return PTR_ERR(next); + return -EIO; + } break; } path->slots[level] = slot; @@ -2633,7 +2811,12 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = slot; if (!level) break; - next = read_node_slot(root, next, slot); + next = read_node_slot(fs_info, next, slot); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + return PTR_ERR(next); + return -EIO; + } } return 0; } @@ -2649,6 +2832,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int level = 1; struct extent_buffer *c; struct extent_buffer *next = NULL; + struct btrfs_fs_info *fs_info = root->fs_info; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) @@ -2663,13 +2847,12 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) continue; } - if (next) - free_extent_buffer(next); - if (path->reada) reada_for_search(root, path, level, slot, 0); - next = read_node_slot(root, c, slot); + next = read_node_slot(fs_info, c, slot); + if (!extent_buffer_uptodate(next)) + return -EIO; break; } path->slots[level] = slot; @@ -2683,7 +2866,9 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; if (path->reada) reada_for_search(root, path, level, 0, 0); - next = read_node_slot(root, next, 0); + next = read_node_slot(fs_info, next, 0); + if (!extent_buffer_uptodate(next)) + return -EIO; } return 0; } @@ -2694,6 +2879,7 @@ int btrfs_previous_item(struct btrfs_root *root, { struct btrfs_key found_key; struct extent_buffer *leaf; + u32 nritems; int ret; while(1) { @@ -2705,10 +2891,86 @@ int btrfs_previous_item(struct btrfs_root *root, path->slots[0]--; } leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (nritems == 0) + return 1; + if (path->slots[0] == nritems) + path->slots[0]--; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid < min_objectid) + break; if (found_key.type == type) return 0; + if (found_key.objectid == min_objectid && + found_key.type < type) + break; + } + return 1; +} + +/* + * search in extent tree to find a previous Metadata/Data extent item with + * min objecitd. + * + * returns 0 if something is found, 1 if nothing was found and < 0 on error + */ +int btrfs_previous_extent_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + u32 nritems; + int ret; + + while (1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (nritems == 0) + return 1; + if (path->slots[0] == nritems) + path->slots[0]--; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid < min_objectid) + break; + if (found_key.type == BTRFS_EXTENT_ITEM_KEY || + found_key.type == BTRFS_METADATA_ITEM_KEY) + return 0; + if (found_key.objectid == min_objectid && + found_key.type < BTRFS_EXTENT_ITEM_KEY) + break; } return 1; } +/* + * Search in extent tree to found next meta/data extent + * Caller needs to check for no-hole or skinny metadata features. + */ +int btrfs_next_extent_item(struct btrfs_root *root, + struct btrfs_path *path, u64 max_objectid) +{ + struct btrfs_key found_key; + int ret; + + while (1) { + ret = btrfs_next_item(root, path); + if (ret) + return ret; + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (found_key.objectid > max_objectid) + return 1; + if (found_key.type == BTRFS_EXTENT_ITEM_KEY || + found_key.type == BTRFS_METADATA_ITEM_KEY) + return 0; + } +}