Btrfs-progs: bugfix for subvolume parent determination in btrfs send
[platform/upstream/btrfs-progs.git] / ctree.c
diff --git a/ctree.c b/ctree.c
index 925cafa..2d86b1e 100644 (file)
--- a/ctree.c
+++ b/ctree.c
@@ -19,6 +19,7 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "print-tree.h"
+#include "repair.h"
 
 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
                      *root, struct btrfs_path *path, int level);
@@ -32,8 +33,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct extent_buffer *dst_buf,
                              struct extent_buffer *src_buf);
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                  struct btrfs_path *path, int level, int slot);
 
 inline void btrfs_init_path(struct btrfs_path *p)
 {
@@ -62,7 +61,7 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
        int i;
        for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
                if (!p->nodes[i])
-                       break;
+                       continue;
                free_extent_buffer(p->nodes[i]);
        }
        memset(p, 0, sizeof(*p));
@@ -85,6 +84,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        int ret = 0;
        int level;
        struct btrfs_root *new_root;
+       struct btrfs_disk_key disk_key;
 
        new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
        if (!new_root)
@@ -98,8 +98,12 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
        level = btrfs_header_level(buf);
-       cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
-                                    new_root_objectid, trans->transid,
+       if (level == 0)
+               btrfs_item_key(buf, &disk_key, 0);
+       else
+               btrfs_node_key(buf, &disk_key, 0);
+       cow = btrfs_alloc_free_block(trans, new_root, buf->len,
+                                    new_root_objectid, &disk_key,
                                     level, buf->start, 0);
        if (IS_ERR(cow)) {
                kfree(new_root);
@@ -109,11 +113,20 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        copy_extent_buffer(cow, buf, 0, 0, cow->len);
        btrfs_set_header_bytenr(cow, cow->start);
        btrfs_set_header_generation(cow, trans->transid);
-       btrfs_set_header_owner(cow, new_root_objectid);
-       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+       btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+                                    BTRFS_HEADER_FLAG_RELOC);
+       if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+               btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+       else
+               btrfs_set_header_owner(cow, new_root_objectid);
+
+       write_extent_buffer(cow, root->fs_info->fsid,
+                           (unsigned long)btrfs_header_fsid(cow),
+                           BTRFS_FSID_SIZE);
 
        WARN_ON(btrfs_header_generation(buf) > trans->transid);
-       ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
+       ret = btrfs_inc_ref(trans, new_root, cow, 0);
        kfree(new_root);
 
        if (ret)
@@ -124,6 +137,165 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *root)
+{
+       struct extent_buffer *c;
+       struct extent_buffer *old = root->node;
+       int level;
+       struct btrfs_disk_key disk_key = {0,0,0};
+
+       level = 0;
+
+       c = btrfs_alloc_free_block(trans, root,
+                                  btrfs_level_size(root, 0),
+                                  root->root_key.objectid,
+                                  &disk_key, level, 0, 0);
+       if (IS_ERR(c)) {
+               c = old;
+               extent_buffer_get(c);
+       }
+
+       memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
+       btrfs_set_header_level(c, level);
+       btrfs_set_header_bytenr(c, c->start);
+       btrfs_set_header_generation(c, trans->transid);
+       btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
+       btrfs_set_header_owner(c, root->root_key.objectid);
+
+       write_extent_buffer(c, root->fs_info->fsid,
+                           (unsigned long)btrfs_header_fsid(c),
+                           BTRFS_FSID_SIZE);
+
+       write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
+                           (unsigned long)btrfs_header_chunk_tree_uuid(c),
+                           BTRFS_UUID_SIZE);
+
+       btrfs_mark_buffer_dirty(c);
+
+       free_extent_buffer(old);
+       root->node = c;
+       add_root_to_dirty_list(root);
+       return 0;
+}
+
+/*
+ * check if the tree block can be shared by multiple trees
+ */
+int btrfs_block_can_be_shared(struct btrfs_root *root,
+                             struct extent_buffer *buf)
+{
+       /*
+        * Tree blocks not in refernece counted trees and tree roots
+        * are never shared. If a block was allocated after the last
+        * snapshot and the block was not allocated by tree relocation,
+        * we know the block is not shared.
+        */
+       if (root->ref_cows &&
+           buf != root->node && buf != root->commit_root &&
+           (btrfs_header_generation(buf) <=
+            btrfs_root_last_snapshot(&root->root_item) ||
+            btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+               return 1;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+        if (root->ref_cows &&
+            btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+                return 1;
+#endif
+       return 0;
+}
+
+static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
+                                      struct btrfs_root *root,
+                                      struct extent_buffer *buf,
+                                      struct extent_buffer *cow)
+{
+       u64 refs;
+       u64 owner;
+       u64 flags;
+       u64 new_flags = 0;
+       int ret;
+
+       /*
+        * Backrefs update rules:
+        *
+        * Always use full backrefs for extent pointers in tree block
+        * allocated by tree relocation.
+        *
+        * If a shared tree block is no longer referenced by its owner
+        * tree (btrfs_header_owner(buf) == root->root_key.objectid),
+        * use full backrefs for extent pointers in tree block.
+        *
+        * If a tree block is been relocating
+        * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
+        * use full backrefs for extent pointers in tree block.
+        * The reason for this is some operations (such as drop tree)
+        * are only allowed for blocks use full backrefs.
+        */
+
+       if (btrfs_block_can_be_shared(root, buf)) {
+               ret = btrfs_lookup_extent_info(trans, root, buf->start,
+                                              buf->len, &refs, &flags);
+               BUG_ON(ret);
+               BUG_ON(refs == 0);
+       } else {
+               refs = 1;
+               if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+                   btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+                       flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+               else
+                       flags = 0;
+       }
+
+       owner = btrfs_header_owner(buf);
+       BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
+              owner == BTRFS_TREE_RELOC_OBJECTID);
+
+       if (refs > 1) {
+               if ((owner == root->root_key.objectid ||
+                    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
+                   !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
+                       ret = btrfs_inc_ref(trans, root, buf, 1);
+                       BUG_ON(ret);
+
+                       if (root->root_key.objectid ==
+                           BTRFS_TREE_RELOC_OBJECTID) {
+                               ret = btrfs_dec_ref(trans, root, buf, 0);
+                               BUG_ON(ret);
+                               ret = btrfs_inc_ref(trans, root, cow, 1);
+                               BUG_ON(ret);
+                       }
+                       new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+               } else {
+
+                       if (root->root_key.objectid ==
+                           BTRFS_TREE_RELOC_OBJECTID)
+                               ret = btrfs_inc_ref(trans, root, cow, 1);
+                       else
+                               ret = btrfs_inc_ref(trans, root, cow, 0);
+                       BUG_ON(ret);
+               }
+               if (new_flags != 0) {
+                       ret = btrfs_set_block_flags(trans, root, buf->start,
+                                                   buf->len, new_flags);
+                       BUG_ON(ret);
+               }
+       } else {
+               if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+                       if (root->root_key.objectid ==
+                           BTRFS_TREE_RELOC_OBJECTID)
+                               ret = btrfs_inc_ref(trans, root, cow, 1);
+                       else
+                               ret = btrfs_inc_ref(trans, root, cow, 0);
+                       BUG_ON(ret);
+                       ret = btrfs_dec_ref(trans, root, buf, 1);
+                       BUG_ON(ret);
+               }
+               clean_tree_block(trans, root, buf);
+       }
+       return 0;
+}
+
 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             struct extent_buffer *buf,
@@ -131,26 +303,23 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                             struct extent_buffer **cow_ret,
                             u64 search_start, u64 empty_size)
 {
-       u64 parent_start;
        struct extent_buffer *cow;
-       u32 nritems;
-       int ret = 0;
-       int different_trans = 0;
+       struct btrfs_disk_key disk_key;
        int level;
 
        WARN_ON(root->ref_cows && trans->transid !=
                root->fs_info->running_transaction->transid);
        WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
-       if (parent)
-               parent_start = parent->start;
+       level = btrfs_header_level(buf);
+
+       if (level == 0)
+               btrfs_item_key(buf, &disk_key, 0);
        else
-               parent_start = 0;
+               btrfs_node_key(buf, &disk_key, 0);
 
-       level = btrfs_header_level(buf);
-       nritems = btrfs_header_nritems(buf);
-       cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
-                                    root->root_key.objectid, trans->transid,
+       cow = btrfs_alloc_free_block(trans, root, buf->len,
+                                    root->root_key.objectid, &disk_key,
                                     level, search_start, empty_size);
        if (IS_ERR(cow))
                return PTR_ERR(cow);
@@ -158,32 +327,28 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        copy_extent_buffer(cow, buf, 0, 0, cow->len);
        btrfs_set_header_bytenr(cow, cow->start);
        btrfs_set_header_generation(cow, trans->transid);
-       btrfs_set_header_owner(cow, root->root_key.objectid);
-       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+       btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+                                    BTRFS_HEADER_FLAG_RELOC);
+       if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+               btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+       else
+               btrfs_set_header_owner(cow, root->root_key.objectid);
+
+       write_extent_buffer(cow, root->fs_info->fsid,
+                           (unsigned long)btrfs_header_fsid(cow),
+                           BTRFS_FSID_SIZE);
 
        WARN_ON(btrfs_header_generation(buf) > trans->transid);
-       if (btrfs_header_generation(buf) != trans->transid) {
-               different_trans = 1;
-               ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
-               if (ret)
-                       return ret;
-       } else {
-               ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
-               if (ret)
-                       return ret;
-               clean_tree_block(trans, root, buf);
-       }
+
+       update_ref_for_cow(trans, root, buf, cow);
 
        if (buf == root->node) {
                root->node = cow;
                extent_buffer_get(cow);
-               if (buf != root->commit_root) {
-                       btrfs_free_extent(trans, root, buf->start,
-                                         buf->len, buf->start,
-                                         root->root_key.objectid,
-                                         btrfs_header_generation(buf),
-                                         0, 0, 1);
-               }
+
+               btrfs_free_extent(trans, root, buf->start, buf->len,
+                                 0, root->root_key.objectid, level, 0);
                free_extent_buffer(buf);
                add_root_to_dirty_list(root);
        } else {
@@ -194,9 +359,9 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                                              trans->transid);
                btrfs_mark_buffer_dirty(parent);
                WARN_ON(btrfs_header_generation(parent) != trans->transid);
+
                btrfs_free_extent(trans, root, buf->start, buf->len,
-                                 parent_start, btrfs_header_owner(parent),
-                                 btrfs_header_generation(parent), 0, 0, 1);
+                                 0, root->root_key.objectid, level, 1);
        }
        free_extent_buffer(buf);
        btrfs_mark_buffer_dirty(cow);
@@ -204,6 +369,18 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static inline int should_cow_block(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  struct extent_buffer *buf)
+{
+       if (btrfs_header_generation(buf) == trans->transid &&
+           !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
+           !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
+             btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+               return 0;
+       return 1;
+}
+
 int btrfs_cow_block(struct btrfs_trans_handle *trans,
                    struct btrfs_root *root, struct extent_buffer *buf,
                    struct extent_buffer *parent, int parent_slot,
@@ -224,8 +401,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
                        (unsigned long long)root->fs_info->generation);
                WARN_ON(1);
        }
-       if (btrfs_header_generation(buf) == trans->transid &&
-           !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
+       if (!should_cow_block(trans, root, buf)) {
                *cow_ret = buf;
                return 0;
        }
@@ -414,156 +590,125 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
        return btrfs_item_offset_nr(leaf, nr - 1);
 }
 
-static int check_node(struct btrfs_root *root, struct btrfs_path *path,
-                     int level)
+int btrfs_check_node(struct btrfs_root *root,
+                     struct btrfs_disk_key *parent_key,
+                     struct extent_buffer *buf)
 {
-       struct extent_buffer *parent = NULL;
-       struct extent_buffer *node = path->nodes[level];
-       struct btrfs_disk_key parent_key;
-       struct btrfs_disk_key node_key;
-       int parent_slot;
-       int slot;
+       int i;
        struct btrfs_key cpukey;
-       u32 nritems = btrfs_header_nritems(node);
+       struct btrfs_disk_key key;
+       u32 nritems = btrfs_header_nritems(buf);
 
-       if (path->nodes[level + 1])
-               parent = path->nodes[level + 1];
+       if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
+               goto fail;
 
-       slot = path->slots[level];
-       BUG_ON(nritems == 0);
-       if (parent) {
-               parent_slot = path->slots[level + 1];
-               btrfs_node_key(parent, &parent_key, parent_slot);
-               btrfs_node_key(node, &node_key, 0);
-               BUG_ON(memcmp(&parent_key, &node_key,
-                             sizeof(struct btrfs_disk_key)));
-               BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-                      btrfs_header_bytenr(node));
-       }
-       BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
-       if (slot != 0) {
-               btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
-               btrfs_node_key(node, &node_key, slot);
-               BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0);
-       }
-       if (slot < nritems - 1) {
-               btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
-               btrfs_node_key(node, &node_key, slot);
-               BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0);
+       if (parent_key && parent_key->type) {
+               btrfs_node_key(buf, &key, 0);
+               if (memcmp(parent_key, &key, sizeof(key)))
+                       goto fail;
+       }
+       for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+               btrfs_node_key(buf, &key, i);
+               btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
+               if (btrfs_comp_keys(&key, &cpukey) >= 0)
+                       goto fail;
        }
        return 0;
+fail:
+       if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+               if (parent_key)
+                       btrfs_disk_key_to_cpu(&cpukey, parent_key);
+               else
+                       btrfs_node_key_to_cpu(buf, &cpukey, 0);
+               btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+                                               buf->start, buf->len,
+                                               btrfs_header_level(buf));
+       }
+       return -EIO;
 }
 
-static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
-                     int level)
+int btrfs_check_leaf(struct btrfs_root *root,
+                     struct btrfs_disk_key *parent_key,
+                     struct extent_buffer *buf)
 {
-       struct extent_buffer *leaf = path->nodes[level];
-       struct extent_buffer *parent = NULL;
-       int parent_slot;
+       int i;
        struct btrfs_key cpukey;
-       struct btrfs_disk_key parent_key;
-       struct btrfs_disk_key leaf_key;
-       int slot = path->slots[0];
-
-       u32 nritems = btrfs_header_nritems(leaf);
+       struct btrfs_disk_key key;
+       u32 nritems = btrfs_header_nritems(buf);
 
-       if (path->nodes[level + 1])
-               parent = path->nodes[level + 1];
+       if (btrfs_header_level(buf) != 0) {
+               fprintf(stderr, "leaf is not a leaf %llu\n",
+                      (unsigned long long)btrfs_header_bytenr(buf));
+               goto fail;
+       }
+       if (btrfs_leaf_free_space(root, buf) < 0) {
+               fprintf(stderr, "leaf free space incorrect %llu %d\n",
+                       (unsigned long long)btrfs_header_bytenr(buf),
+                       btrfs_leaf_free_space(root, buf));
+               goto fail;
+       }
 
        if (nritems == 0)
                return 0;
 
-       if (parent) {
-               parent_slot = path->slots[level + 1];
-               btrfs_node_key(parent, &parent_key, parent_slot);
-               btrfs_item_key(leaf, &leaf_key, 0);
-
-               BUG_ON(memcmp(&parent_key, &leaf_key,
-                      sizeof(struct btrfs_disk_key)));
-               BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-                      btrfs_header_bytenr(leaf));
+       btrfs_item_key(buf, &key, 0);
+       if (parent_key && parent_key->type &&
+           memcmp(parent_key, &key, sizeof(key))) {
+               fprintf(stderr, "leaf parent key incorrect %llu\n",
+                      (unsigned long long)btrfs_header_bytenr(buf));
+               goto fail;
        }
-#if 0
        for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-               btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
-               btrfs_item_key(leaf, &leaf_key, i);
-               if (comp_keys(&leaf_key, &cpukey) >= 0) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad key\n", i);
-                       BUG_ON(1);
-               }
-               if (btrfs_item_offset_nr(leaf, i) !=
-                       btrfs_item_end_nr(leaf, i + 1)) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad\n", i);
-                       BUG_ON(1);
-               }
-               if (i == 0) {
-                       if (btrfs_item_offset_nr(leaf, i) +
-                              btrfs_item_size_nr(leaf, i) !=
-                              BTRFS_LEAF_DATA_SIZE(root)) {
-                               btrfs_print_leaf(root, leaf);
-                               printk("slot %d first offset bad\n", i);
-                               BUG_ON(1);
-                       }
-               }
-       }
-       if (nritems > 0) {
-               if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
-                               btrfs_print_leaf(root, leaf);
-                               printk("slot %d bad size \n", nritems - 1);
-                               BUG_ON(1);
-               }
-       }
-#endif
-       if (slot != 0 && slot < nritems - 1) {
-               btrfs_item_key(leaf, &leaf_key, slot);
-               btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
-               if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad key\n", slot);
-                       BUG_ON(1);
+               btrfs_item_key(buf, &key, i);
+               btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
+               if (btrfs_comp_keys(&key, &cpukey) >= 0) {
+                       fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
+                       goto fail;
                }
-               if (btrfs_item_offset_nr(leaf, slot - 1) !=
-                      btrfs_item_end_nr(leaf, slot)) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad\n", slot);
-                       BUG_ON(1);
+               if (btrfs_item_offset_nr(buf, i) !=
+                       btrfs_item_end_nr(buf, i + 1)) {
+                       fprintf(stderr, "incorrect offsets %u %u\n",
+                               btrfs_item_offset_nr(buf, i),
+                               btrfs_item_end_nr(buf, i + 1));
+                       goto fail;
                }
-       }
-       if (slot < nritems - 1) {
-               btrfs_item_key(leaf, &leaf_key, slot);
-               btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
-               BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0);
-               if (btrfs_item_offset_nr(leaf, slot) !=
-                       btrfs_item_end_nr(leaf, slot + 1)) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad\n", slot);
-                       BUG_ON(1);
+               if (i == 0 && btrfs_item_end_nr(buf, i) !=
+                   BTRFS_LEAF_DATA_SIZE(root)) {
+                       fprintf(stderr, "bad item end %u wanted %u\n",
+                               btrfs_item_end_nr(buf, i),
+                               (unsigned)BTRFS_LEAF_DATA_SIZE(root));
+                       goto fail;
                }
        }
-       BUG_ON(btrfs_item_offset_nr(leaf, 0) +
-              btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
        return 0;
+fail:
+       if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+               if (parent_key)
+                       btrfs_disk_key_to_cpu(&cpukey, parent_key);
+               else
+                       btrfs_item_key_to_cpu(buf, &cpukey, 0);
+
+               btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+                                               buf->start, buf->len, 0);
+       }
+       return -EIO;
 }
 
 static int noinline check_block(struct btrfs_root *root,
                                struct btrfs_path *path, int level)
 {
-       return 0;
-#if 0
-       struct extent_buffer *buf = path->nodes[level];
+       struct btrfs_disk_key key;
+       struct btrfs_disk_key *key_ptr = NULL;
+       struct extent_buffer *parent;
 
-       if (memcmp_extent_buffer(buf, root->fs_info->fsid,
-                                (unsigned long)btrfs_header_fsid(buf),
-                                BTRFS_FSID_SIZE)) {
-               printk("warning bad block %Lu\n", buf->start);
-               return 1;
+       if (path->nodes[level + 1]) {
+               parent = path->nodes[level + 1];
+               btrfs_node_key(parent, &key, path->slots[level + 1]);
+               key_ptr = &key;
        }
-#endif
        if (level == 0)
-               return check_leaf(root, path, level);
-       return check_node(root, path, level);
+               return btrfs_check_leaf(root, key_ptr, path->nodes[0]);
+       return btrfs_check_node(root, key_ptr, path->nodes[level]);
 }
 
 /*
@@ -630,7 +775,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
        return -1;
 }
 
-static struct extent_buffer *read_node_slot(struct btrfs_root *root,
+struct extent_buffer *read_node_slot(struct btrfs_root *root,
                                   struct extent_buffer *parent, int slot)
 {
        int level = btrfs_header_level(parent);
@@ -658,7 +803,6 @@ static int balance_level(struct btrfs_trans_handle *trans,
        int wret;
        int pslot;
        int orig_slot = path->slots[level];
-       int err_on_enospc = 0;
        u64 orig_ptr;
 
        if (level == 0)
@@ -690,23 +834,16 @@ static int balance_level(struct btrfs_trans_handle *trans,
                BUG_ON(ret);
 
                root->node = child;
-
-               ret = btrfs_update_extent_ref(trans, root, child->start,
-                                             mid->start, child->start,
-                                             root->root_key.objectid,
-                                             trans->transid,
-                                             level - 1, 0);
-               BUG_ON(ret);
-
                add_root_to_dirty_list(root);
                path->nodes[level] = NULL;
                clean_tree_block(trans, root, mid);
                wait_on_tree_block_writeback(root, mid);
                /* once for the path */
                free_extent_buffer(mid);
+
                ret = btrfs_free_extent(trans, root, mid->start, mid->len,
-                                       mid->start, root->root_key.objectid,
-                                       btrfs_header_generation(mid), 0, 0, 1);
+                                       0, root->root_key.objectid,
+                                       level, 1);
                /* once for the root ptr */
                free_extent_buffer(mid);
                return ret;
@@ -715,9 +852,6 @@ static int balance_level(struct btrfs_trans_handle *trans,
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
                return 0;
 
-       if (btrfs_header_nritems(mid) < 2)
-               err_on_enospc = 1;
-
        left = read_node_slot(root, parent, pslot - 1);
        if (left) {
                wret = btrfs_cow_block(trans, root, left,
@@ -743,8 +877,6 @@ static int balance_level(struct btrfs_trans_handle *trans,
                wret = push_node_left(trans, root, left, mid, 1);
                if (wret < 0)
                        ret = wret;
-               if (btrfs_header_nritems(mid) < 2)
-                       err_on_enospc = 1;
        }
 
        /*
@@ -756,21 +888,20 @@ static int balance_level(struct btrfs_trans_handle *trans,
                        ret = wret;
                if (btrfs_header_nritems(right) == 0) {
                        u64 bytenr = right->start;
-                       u64 generation = btrfs_header_generation(parent);
                        u32 blocksize = right->len;
 
                        clean_tree_block(trans, root, right);
                        wait_on_tree_block_writeback(root, right);
                        free_extent_buffer(right);
                        right = NULL;
-                       wret = del_ptr(trans, root, path, level + 1, pslot +
-                                      1);
+                       wret = btrfs_del_ptr(trans, root, path,
+                                            level + 1, pslot + 1);
                        if (wret)
                                ret = wret;
                        wret = btrfs_free_extent(trans, root, bytenr,
-                                                blocksize, parent->start,
-                                                btrfs_header_owner(parent),
-                                                generation, 0, 0, 1);
+                                                blocksize, 0,
+                                                root->root_key.objectid,
+                                                level, 0);
                        if (wret)
                                ret = wret;
                } else {
@@ -805,20 +936,18 @@ static int balance_level(struct btrfs_trans_handle *trans,
        }
        if (btrfs_header_nritems(mid) == 0) {
                /* we've managed to empty the middle node, drop it */
-               u64 root_gen = btrfs_header_generation(parent);
                u64 bytenr = mid->start;
                u32 blocksize = mid->len;
                clean_tree_block(trans, root, mid);
                wait_on_tree_block_writeback(root, mid);
                free_extent_buffer(mid);
                mid = NULL;
-               wret = del_ptr(trans, root, path, level + 1, pslot);
+               wret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
                if (wret)
                        ret = wret;
                wret = btrfs_free_extent(trans, root, bytenr, blocksize,
-                                        parent->start,
-                                        btrfs_header_owner(parent),
-                                        root_gen, 0, 0, 1);
+                                        0, root->root_key.objectid,
+                                        level, 0);
                if (wret)
                        ret = wret;
        } else {
@@ -869,14 +998,12 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
        int wret;
        int pslot;
        int orig_slot = path->slots[level];
-       u64 orig_ptr;
 
        if (level == 0)
                return 1;
 
        mid = path->nodes[level];
        WARN_ON(btrfs_header_generation(mid) != trans->transid);
-       orig_ptr = btrfs_node_blockptr(mid, orig_slot);
 
        if (level < BTRFS_MAX_LEVEL - 1)
                parent = path->nodes[level + 1];
@@ -975,7 +1102,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
 /*
  * readahead one full node of leaves
  */
-static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
+void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
                             int level, int slot, u64 objectid)
 {
        struct extent_buffer *node;
@@ -1072,7 +1199,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        u8 lowest_level = 0;
 
        lowest_level = p->lowest_level;
-       WARN_ON(lowest_level && ins_len);
+       WARN_ON(lowest_level && ins_len > 0);
        WARN_ON(p->nodes[0] != NULL);
        /*
        WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
@@ -1106,7 +1233,8 @@ again:
                        if (ret && slot > 0)
                                slot -= 1;
                        p->slots[level] = slot;
-                       if (ins_len > 0 && btrfs_header_nritems(b) >=
+                       if ((p->search_for_split || ins_len > 0) &&
+                           btrfs_header_nritems(b) >=
                            BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
                                int sret = split_node(trans, root, p, level);
                                BUG_ON(sret > 0);
@@ -1136,10 +1264,12 @@ again:
                                                 key->objectid);
 
                        b = read_node_slot(root, b, slot);
+                       if (!extent_buffer_uptodate(b))
+                               return -EIO;
                } else {
                        p->slots[level] = slot;
-                       if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
-                           sizeof(struct btrfs_item) + ins_len) {
+                       if (ins_len > 0 &&
+                           ins_len > btrfs_leaf_free_space(root, b)) {
                                int sret = split_leaf(trans, root, key,
                                                      p, ins_len, ret == 0);
                                BUG_ON(sret > 0);
@@ -1278,8 +1408,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(src);
        btrfs_mark_buffer_dirty(dst);
 
-       ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
-       BUG_ON(ret);
        return ret;
 }
 
@@ -1342,8 +1470,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(src);
        btrfs_mark_buffer_dirty(dst);
 
-       ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
-       BUG_ON(ret);
        return ret;
 }
 
@@ -1363,7 +1489,6 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
        struct extent_buffer *c;
        struct extent_buffer *old;
        struct btrfs_disk_key lower_key;
-       int ret;
 
        BUG_ON(path->nodes[level]);
        BUG_ON(path->nodes[level-1] != root->node);
@@ -1374,18 +1499,19 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
        else
                btrfs_node_key(lower, &lower_key, 0);
 
-       c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
-                                  root->root_key.objectid,
-                                  trans->transid, level,
-                                  root->node->start, 0);
+       c = btrfs_alloc_free_block(trans, root, root->nodesize,
+                                  root->root_key.objectid, &lower_key, 
+                                  level, root->node->start, 0);
+
        if (IS_ERR(c))
                return PTR_ERR(c);
 
-       memset_extent_buffer(c, 0, 0, root->nodesize);
+       memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_nritems(c, 1);
        btrfs_set_header_level(c, level);
        btrfs_set_header_bytenr(c, c->start);
        btrfs_set_header_generation(c, trans->transid);
+       btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(c, root->root_key.objectid);
 
        write_extent_buffer(c, root->fs_info->fsid,
@@ -1408,12 +1534,6 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
        old = root->node;
        root->node = c;
 
-       ret = btrfs_update_extent_ref(trans, root, lower->start,
-                                     lower->start, c->start,
-                                     root->root_key.objectid,
-                                     trans->transid, level - 1, 0);
-       BUG_ON(ret);
-
        /* the super has an extra ref to root->node */
        free_extent_buffer(old);
 
@@ -1500,21 +1620,21 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
        }
 
        c_nritems = btrfs_header_nritems(c);
+       mid = (c_nritems + 1) / 2;
+       btrfs_node_key(c, &disk_key, mid);
 
-       btrfs_node_key(c, &disk_key, 0);
        split = btrfs_alloc_free_block(trans, root, root->nodesize,
-                                        path->nodes[level + 1]->start,
-                                        root->root_key.objectid,
-                                        trans->transid, level, c->start, 0);
+                                       root->root_key.objectid,
+                                       &disk_key, level, c->start, 0);
        if (IS_ERR(split))
                return PTR_ERR(split);
 
-       btrfs_set_header_flags(split, btrfs_header_flags(c));
+       memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_level(split, btrfs_header_level(c));
        btrfs_set_header_bytenr(split, split->start);
        btrfs_set_header_generation(split, trans->transid);
+       btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(split, root->root_key.objectid);
-       btrfs_set_header_flags(split, 0);
        write_extent_buffer(split, root->fs_info->fsid,
                            (unsigned long)btrfs_header_fsid(split),
                            BTRFS_FSID_SIZE);
@@ -1522,7 +1642,6 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
                            (unsigned long)btrfs_header_chunk_tree_uuid(split),
                            BTRFS_UUID_SIZE);
 
-       mid = (c_nritems + 1) / 2;
 
        copy_extent_buffer(split, c,
                           btrfs_node_key_ptr_offset(0),
@@ -1535,16 +1654,12 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
        btrfs_mark_buffer_dirty(c);
        btrfs_mark_buffer_dirty(split);
 
-       btrfs_node_key(split, &disk_key, 0);
        wret = insert_ptr(trans, root, path, &disk_key, split->start,
                          path->slots[level + 1] + 1,
                          level + 1);
        if (wret)
                ret = wret;
 
-       ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
-       BUG_ON(ret);
-
        if (path->slots[level] >= mid) {
                path->slots[level] -= mid;
                free_extent_buffer(c);
@@ -1632,7 +1747,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
 
        right = read_node_slot(root, upper, slot + 1);
        free_space = btrfs_leaf_free_space(root, right);
-       if (free_space < data_size + sizeof(struct btrfs_item)) {
+       if (free_space < data_size) {
                free_extent_buffer(right);
                return 1;
        }
@@ -1645,7 +1760,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
                return 1;
        }
        free_space = btrfs_leaf_free_space(root, right);
-       if (free_space < data_size + sizeof(struct btrfs_item)) {
+       if (free_space < data_size) {
                free_extent_buffer(right);
                return 1;
        }
@@ -1735,9 +1850,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
        btrfs_set_node_key(upper, &disk_key, slot + 1);
        btrfs_mark_buffer_dirty(upper);
 
-       ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
-       BUG_ON(ret);
-
        /* then fixup the leaf pointer in the path */
        if (path->slots[0] >= left_nritems) {
                path->slots[0] -= left_nritems;
@@ -1787,7 +1899,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
 
        left = read_node_slot(root, path->nodes[1], slot - 1);
        free_space = btrfs_leaf_free_space(root, left);
-       if (free_space < data_size + sizeof(struct btrfs_item)) {
+       if (free_space < data_size) {
                free_extent_buffer(left);
                return 1;
        }
@@ -1802,7 +1914,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
        }
 
        free_space = btrfs_leaf_free_space(root, left);
-       if (free_space < data_size + sizeof(struct btrfs_item)) {
+       if (free_space < data_size) {
                free_extent_buffer(left);
                return 1;
        }
@@ -1898,10 +2010,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
        if (wret)
                ret = wret;
 
-       ret = btrfs_update_ref(trans, root, right, left,
-                              old_left_nritems, push_items);
-       BUG_ON(ret);
-
        /* then fixup the leaf pointer in the path */
        if (path->slots[0] < push_items) {
                path->slots[0] += old_left_nritems;
@@ -1922,34 +2030,96 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
  *
  * returns 0 if all went well and < 0 on failure.
  */
-static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
-                     *root, struct btrfs_key *ins_key,
-                     struct btrfs_path *path, int data_size, int extend)
+static noinline int copy_for_split(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct btrfs_path *path,
+                              struct extent_buffer *l,
+                              struct extent_buffer *right,
+                              int slot, int mid, int nritems)
 {
+       int data_copy_size;
+       int rt_data_off;
+       int i;
+       int ret = 0;
+       int wret;
+       struct btrfs_disk_key disk_key;
+
+       nritems = nritems - mid;
+       btrfs_set_header_nritems(right, nritems);
+       data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
+
+       copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
+                          btrfs_item_nr_offset(mid),
+                          nritems * sizeof(struct btrfs_item));
+
+       copy_extent_buffer(right, l,
+                    btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
+                    data_copy_size, btrfs_leaf_data(l) +
+                    leaf_data_end(root, l), data_copy_size);
+
+       rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
+                     btrfs_item_end_nr(l, mid);
+
+       for (i = 0; i < nritems; i++) {
+               struct btrfs_item *item = btrfs_item_nr(right, i);
+               u32 ioff = btrfs_item_offset(right, item);
+               btrfs_set_item_offset(right, item, ioff + rt_data_off);
+       }
+
+       btrfs_set_header_nritems(l, mid);
+       ret = 0;
+       btrfs_item_key(right, &disk_key, 0);
+       wret = insert_ptr(trans, root, path, &disk_key, right->start,
+                         path->slots[1] + 1, 1);
+       if (wret)
+               ret = wret;
+
+       btrfs_mark_buffer_dirty(right);
+       btrfs_mark_buffer_dirty(l);
+       BUG_ON(path->slots[0] != slot);
+
+       if (mid <= slot) {
+               free_extent_buffer(path->nodes[0]);
+               path->nodes[0] = right;
+               path->slots[0] -= mid;
+               path->slots[1] += 1;
+       } else {
+               free_extent_buffer(right);
+       }
+
+       BUG_ON(path->slots[0] < 0);
+
+       return ret;
+}
+
+/*
+ * split the path's leaf in two, making sure there is at least data_size
+ * available for the resulting leaf level of the path.
+ *
+ * returns 0 if all went well and < 0 on failure.
+ */
+static noinline int split_leaf(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct btrfs_key *ins_key,
+                              struct btrfs_path *path, int data_size,
+                              int extend)
+{
+       struct btrfs_disk_key disk_key;
        struct extent_buffer *l;
        u32 nritems;
        int mid;
        int slot;
        struct extent_buffer *right;
-       int space_needed = data_size + sizeof(struct btrfs_item);
-       int data_copy_size;
-       int rt_data_off;
-       int i;
        int ret = 0;
        int wret;
-       int double_split;
+       int split;
        int num_doubles = 0;
-       struct btrfs_disk_key disk_key;
-
-       if (extend)
-               space_needed = data_size;
 
        /* first try to make some room by pushing left and right */
-       if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
+       if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
                wret = push_leaf_right(trans, root, path, data_size, 0);
-               if (wret < 0) {
+               if (wret < 0)
                        return wret;
-               }
                if (wret) {
                        wret = push_leaf_left(trans, root, path, data_size, 0);
                        if (wret < 0)
@@ -1958,7 +2128,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
                l = path->nodes[0];
 
                /* did the pushes work? */
-               if (btrfs_leaf_free_space(root, l) >= space_needed)
+               if (btrfs_leaf_free_space(root, l) >= data_size)
                        return 0;
        }
 
@@ -1968,16 +2138,53 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
                        return ret;
        }
 again:
-       double_split = 0;
+       split = 1;
        l = path->nodes[0];
        slot = path->slots[0];
        nritems = btrfs_header_nritems(l);
-       mid = (nritems + 1)/ 2;
+       mid = (nritems + 1) / 2;
+
+       if (mid <= slot) {
+               if (nritems == 1 ||
+                   leaf_space_used(l, mid, nritems - mid) + data_size >
+                       BTRFS_LEAF_DATA_SIZE(root)) {
+                       if (slot >= nritems) {
+                               split = 0;
+                       } else {
+                               mid = slot;
+                               if (mid != nritems &&
+                                   leaf_space_used(l, mid, nritems - mid) +
+                                   data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                       split = 2;
+                               }
+                       }
+               }
+       } else {
+               if (leaf_space_used(l, 0, mid) + data_size >
+                       BTRFS_LEAF_DATA_SIZE(root)) {
+                       if (!extend && data_size && slot == 0) {
+                               split = 0;
+                       } else if ((extend || !data_size) && slot == 0) {
+                               mid = 1;
+                       } else {
+                               mid = slot;
+                               if (mid != nritems &&
+                                   leaf_space_used(l, mid, nritems - mid) +
+                                   data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                       split = 2 ;
+                               }
+                       }
+               }
+       }
+       
+       if (split == 0)
+               btrfs_cpu_key_to_disk(&disk_key, ins_key);
+       else
+               btrfs_item_key(l, &disk_key, mid);
 
        right = btrfs_alloc_free_block(trans, root, root->leafsize,
-                                       path->nodes[1]->start,
                                        root->root_key.objectid,
-                                       trans->transid, 0, l->start, 0);
+                                       &disk_key, 0, l->start, 0);
        if (IS_ERR(right)) {
                BUG_ON(1);
                return PTR_ERR(right);
@@ -1986,6 +2193,7 @@ again:
        memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_bytenr(right, right->start);
        btrfs_set_header_generation(right, trans->transid);
+       btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(right, root->root_key.objectid);
        btrfs_set_header_level(right, 0);
        write_extent_buffer(right, root->fs_info->fsid,
@@ -1995,117 +2203,165 @@ again:
        write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
                            (unsigned long)btrfs_header_chunk_tree_uuid(right),
                            BTRFS_UUID_SIZE);
-       if (mid <= slot) {
-               if (nritems == 1 ||
-                   leaf_space_used(l, mid, nritems - mid) + space_needed >
-                       BTRFS_LEAF_DATA_SIZE(root)) {
-                       if (slot >= nritems) {
-                               btrfs_cpu_key_to_disk(&disk_key, ins_key);
-                               btrfs_set_header_nritems(right, 0);
-                               wret = insert_ptr(trans, root, path,
-                                                 &disk_key, right->start,
-                                                 path->slots[1] + 1, 1);
-                               if (wret)
-                                       ret = wret;
-                               free_extent_buffer(path->nodes[0]);
-                               path->nodes[0] = right;
-                               path->slots[0] = 0;
-                               path->slots[1] += 1;
-                               return ret;
-                       }
-                       mid = slot;
-                       if (mid != nritems &&
-                           leaf_space_used(l, mid, nritems - mid) +
-                           space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
-                               double_split = 1;
-                       }
-               }
-       } else {
-               if (leaf_space_used(l, 0, mid + 1) + space_needed >
-                       BTRFS_LEAF_DATA_SIZE(root)) {
-                       if (!extend && slot == 0) {
-                               btrfs_cpu_key_to_disk(&disk_key, ins_key);
-                               btrfs_set_header_nritems(right, 0);
-                               wret = insert_ptr(trans, root, path,
-                                                 &disk_key,
-                                                 right->start,
-                                                 path->slots[1], 1);
+
+       if (split == 0) {
+               if (mid <= slot) {
+                       btrfs_set_header_nritems(right, 0);
+                       wret = insert_ptr(trans, root, path,
+                                         &disk_key, right->start,
+                                         path->slots[1] + 1, 1);
+                       if (wret)
+                               ret = wret;
+
+                       free_extent_buffer(path->nodes[0]);
+                       path->nodes[0] = right;
+                       path->slots[0] = 0;
+                       path->slots[1] += 1;
+               } else {
+                       btrfs_set_header_nritems(right, 0);
+                       wret = insert_ptr(trans, root, path,
+                                         &disk_key,
+                                         right->start,
+                                         path->slots[1], 1);
+                       if (wret)
+                               ret = wret;
+                       free_extent_buffer(path->nodes[0]);
+                       path->nodes[0] = right;
+                       path->slots[0] = 0;
+                       if (path->slots[1] == 0) {
+                               wret = fixup_low_keys(trans, root,
+                                               path, &disk_key, 1);
                                if (wret)
                                        ret = wret;
-                               free_extent_buffer(path->nodes[0]);
-                               path->nodes[0] = right;
-                               path->slots[0] = 0;
-                               if (path->slots[1] == 0) {
-                                       wret = fixup_low_keys(trans, root,
-                                                  path, &disk_key, 1);
-                                       if (wret)
-                                               ret = wret;
-                               }
-                               return ret;
-                       } else if (extend && slot == 0) {
-                               mid = 1;
-                       } else {
-                               mid = slot;
-                               if (mid != nritems &&
-                                   leaf_space_used(l, mid, nritems - mid) +
-                                   space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
-                                       double_split = 1;
-                               }
                        }
                }
+               btrfs_mark_buffer_dirty(right);
+               return ret;
        }
-       nritems = nritems - mid;
-       btrfs_set_header_nritems(right, nritems);
-       data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
 
-       copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
-                          btrfs_item_nr_offset(mid),
-                          nritems * sizeof(struct btrfs_item));
+       ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
+       BUG_ON(ret);
 
-       copy_extent_buffer(right, l,
-                    btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
-                    data_copy_size, btrfs_leaf_data(l) +
-                    leaf_data_end(root, l), data_copy_size);
+       if (split == 2) {
+               BUG_ON(num_doubles != 0);
+               num_doubles++;
+               goto again;
+       }
 
-       rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
-                     btrfs_item_end_nr(l, mid);
+       return ret;
+}
 
-       for (i = 0; i < nritems; i++) {
-               struct btrfs_item *item = btrfs_item_nr(right, i);
-               u32 ioff = btrfs_item_offset(right, item);
-               btrfs_set_item_offset(right, item, ioff + rt_data_off);
-       }
+/*
+ * This function splits a single item into two items,
+ * giving 'new_key' to the new item and splitting the
+ * old one at split_offset (from the start of the item).
+ *
+ * The path may be released by this operation.  After
+ * the split, the path is pointing to the old item.  The
+ * new item is going to be in the same node as the old one.
+ *
+ * Note, the item being split must be smaller enough to live alone on
+ * a tree block with room for one extra struct btrfs_item
+ *
+ * This allows us to split the item in place, keeping a lock on the
+ * leaf the entire time.
+ */
+int btrfs_split_item(struct btrfs_trans_handle *trans,
+                    struct btrfs_root *root,
+                    struct btrfs_path *path,
+                    struct btrfs_key *new_key,
+                    unsigned long split_offset)
+{
+       u32 item_size;
+       struct extent_buffer *leaf;
+       struct btrfs_key orig_key;
+       struct btrfs_item *item;
+       struct btrfs_item *new_item;
+       int ret = 0;
+       int slot;
+       u32 nritems;
+       u32 orig_offset;
+       struct btrfs_disk_key disk_key;
+       char *buf;
 
-       btrfs_set_header_nritems(l, mid);
-       ret = 0;
-       btrfs_item_key(right, &disk_key, 0);
-       wret = insert_ptr(trans, root, path, &disk_key, right->start,
-                         path->slots[1] + 1, 1);
-       if (wret)
-               ret = wret;
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
+       if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
+               goto split;
 
-       btrfs_mark_buffer_dirty(right);
-       btrfs_mark_buffer_dirty(l);
-       BUG_ON(path->slots[0] != slot);
+       item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+       btrfs_release_path(root, path);
 
-       ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
+       path->search_for_split = 1;
+
+       ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
+       path->search_for_split = 0;
+
+       /* if our item isn't there or got smaller, return now */
+       if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
+                                                       path->slots[0])) {
+               return -EAGAIN;
+       }
+
+       ret = split_leaf(trans, root, &orig_key, path, 0, 0);
        BUG_ON(ret);
 
-       if (mid <= slot) {
-               free_extent_buffer(path->nodes[0]);
-               path->nodes[0] = right;
-               path->slots[0] -= mid;
-               path->slots[1] += 1;
-       } else
-               free_extent_buffer(right);
+       BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
+       leaf = path->nodes[0];
 
-       BUG_ON(path->slots[0] < 0);
+split:
+       item = btrfs_item_nr(leaf, path->slots[0]);
+       orig_offset = btrfs_item_offset(leaf, item);
+       item_size = btrfs_item_size(leaf, item);
 
-       if (double_split) {
-               BUG_ON(num_doubles != 0);
-               num_doubles++;
-               goto again;
+
+       buf = kmalloc(item_size, GFP_NOFS);
+       read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
+                           path->slots[0]), item_size);
+       slot = path->slots[0] + 1;
+       leaf = path->nodes[0];
+
+       nritems = btrfs_header_nritems(leaf);
+
+       if (slot != nritems) {
+               /* shift the items */
+               memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
+                             btrfs_item_nr_offset(slot),
+                             (nritems - slot) * sizeof(struct btrfs_item));
+
+       }
+
+       btrfs_cpu_key_to_disk(&disk_key, new_key);
+       btrfs_set_item_key(leaf, &disk_key, slot);
+
+       new_item = btrfs_item_nr(leaf, slot);
+
+       btrfs_set_item_offset(leaf, new_item, orig_offset);
+       btrfs_set_item_size(leaf, new_item, item_size - split_offset);
+
+       btrfs_set_item_offset(leaf, item,
+                             orig_offset + item_size - split_offset);
+       btrfs_set_item_size(leaf, item, split_offset);
+
+       btrfs_set_header_nritems(leaf, nritems + 1);
+
+       /* write the data for the start of the original item */
+       write_extent_buffer(leaf, buf,
+                           btrfs_item_ptr_offset(leaf, path->slots[0]),
+                           split_offset);
+
+       /* write the data for the new item */
+       write_extent_buffer(leaf, buf + split_offset,
+                           btrfs_item_ptr_offset(leaf, slot),
+                           item_size - split_offset);
+       btrfs_mark_buffer_dirty(leaf);
+
+       ret = 0;
+       if (btrfs_leaf_free_space(root, leaf) < 0) {
+               btrfs_print_leaf(root, leaf);
+               BUG();
        }
+       kfree(buf);
        return ret;
 }
 
@@ -2116,7 +2372,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
 {
        int ret = 0;
        int slot;
-       int slot_orig;
        struct extent_buffer *leaf;
        struct btrfs_item *item;
        u32 nritems;
@@ -2126,7 +2381,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
        unsigned int size_diff;
        int i;
 
-       slot_orig = path->slots[0];
        leaf = path->nodes[0];
        slot = path->slots[0];
 
@@ -2214,7 +2468,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
 {
        int ret = 0;
        int slot;
-       int slot_orig;
        struct extent_buffer *leaf;
        struct btrfs_item *item;
        u32 nritems;
@@ -2223,7 +2476,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
        unsigned int old_size;
        int i;
 
-       slot_orig = path->slots[0];
        leaf = path->nodes[0];
 
        nritems = btrfs_header_nritems(leaf);
@@ -2287,7 +2539,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
        struct btrfs_item *item;
        int ret = 0;
        int slot;
-       int slot_orig;
        int i;
        u32 nritems;
        u32 total_size = 0;
@@ -2303,7 +2554,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
        if (!root->node)
                BUG();
 
-       total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
+       total_size = total_data + nr * sizeof(struct btrfs_item);
        ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
        if (ret == 0) {
                return -EEXIST;
@@ -2311,14 +2562,12 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
        if (ret < 0)
                goto out;
 
-       slot_orig = path->slots[0];
        leaf = path->nodes[0];
 
        nritems = btrfs_header_nritems(leaf);
        data_end = leaf_data_end(root, leaf);
 
-       if (btrfs_leaf_free_space(root, leaf) <
-           sizeof(struct btrfs_item) + total_size) {
+       if (btrfs_leaf_free_space(root, leaf) < total_size) {
                btrfs_print_leaf(root, leaf);
                printk("not enough freespace need %u have %d\n",
                       total_size, btrfs_leaf_free_space(root, leaf));
@@ -2422,7 +2671,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
  * continuing all the way the root if required.  The root is converted into
  * a leaf if all the nodes are emptied.
  */
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                   struct btrfs_path *path, int level, int slot)
 {
        struct extent_buffer *parent = path->nodes[level];
@@ -2457,6 +2706,33 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 }
 
 /*
+ * a helper function to delete the leaf pointed to by path->slots[1] and
+ * path->nodes[1].
+ *
+ * This deletes the pointer in path->nodes[1] and frees the leaf
+ * block extent.  zero is returned if it all worked out, < 0 otherwise.
+ *
+ * The path must have already been setup for deleting the leaf, including
+ * all the proper balancing.  path->nodes[1] must be locked.
+ */
+static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  struct btrfs_path *path,
+                                  struct extent_buffer *leaf)
+{
+       int ret;
+
+       WARN_ON(btrfs_header_generation(leaf) != trans->transid);
+       ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
+       if (ret)
+               return ret;
+
+       ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
+                               0, root->root_key.objectid, 0, 0);
+       return ret;
+}
+
+/*
  * delete the item at the leaf level in path.  If that empties
  * the leaf, remove it from the tree
  */
@@ -2510,17 +2786,11 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                if (leaf == root->node) {
                        btrfs_set_header_level(leaf, 0);
                } else {
-                       u64 root_gen = btrfs_header_generation(path->nodes[1]);
                        clean_tree_block(trans, root, leaf);
                        wait_on_tree_block_writeback(root, leaf);
-                       wret = del_ptr(trans, root, path, 1, path->slots[1]);
-                       if (wret)
-                               ret = wret;
-                       wret = btrfs_free_extent(trans, root,
-                                        leaf->start, leaf->len,
-                                        path->nodes[1]->start,
-                                        btrfs_header_owner(path->nodes[1]),
-                                        root_gen, 0, 0, 1);
+
+                       wret = btrfs_del_leaf(trans, root, path, leaf);
+                       BUG_ON(ret);
                        if (wret)
                                ret = wret;
                }
@@ -2557,27 +2827,14 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        }
 
                        if (btrfs_header_nritems(leaf) == 0) {
-                               u64 root_gen;
-                               u64 bytenr = leaf->start;
-                               u32 blocksize = leaf->len;
-
-                               root_gen = btrfs_header_generation(
-                                                          path->nodes[1]);
-
                                clean_tree_block(trans, root, leaf);
                                wait_on_tree_block_writeback(root, leaf);
 
-                               wret = del_ptr(trans, root, path, 1, slot);
-                               if (wret)
-                                       ret = wret;
-
+                               path->slots[1] = slot;
+                               ret = btrfs_del_leaf(trans, root, path, leaf);
+                               BUG_ON(ret);
                                free_extent_buffer(leaf);
-                               wret = btrfs_free_extent(trans, root, bytenr,
-                                            blocksize, path->nodes[1]->start,
-                                            btrfs_header_owner(path->nodes[1]),
-                                            root_gen, 0, 0, 1);
-                               if (wret)
-                                       ret = wret;
+
                        } else {
                                btrfs_mark_buffer_dirty(leaf);
                                free_extent_buffer(leaf);
@@ -2670,6 +2927,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
                        reada_for_search(root, path, level, slot, 0);
 
                next = read_node_slot(root, c, slot);
+               if (!next)
+                       return -EIO;
                break;
        }
        path->slots[level] = slot;
@@ -2684,6 +2943,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
                if (path->reada)
                        reada_for_search(root, path, level, 0, 0);
                next = read_node_slot(root, next, 0);
+               if (!next)
+                       return -EIO;
        }
        return 0;
 }