Btrfs: Add btree locking to the tree defragmentation code
authorChris Mason <chris.mason@oracle.com>
Wed, 25 Jun 2008 20:01:31 +0000 (16:01 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:03 +0000 (11:04 -0400)
The online btree defragger is simplified and rewritten to use
standard btree searches instead of a walk up / down mechanism.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/tree-defrag.c

index 40f0e0c..7f4cc2b 100644 (file)
@@ -354,7 +354,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                       struct btrfs_key *progress)
 {
        struct extent_buffer *cur;
-       struct extent_buffer *tmp;
        u64 blocknr;
        u64 gen;
        u64 search_start = *last_ret;
@@ -370,9 +369,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
        int progress_passed = 0;
        struct btrfs_disk_key disk_key;
 
-       /* FIXME this code needs locking */
-       return 0;
-
        parent_level = btrfs_header_level(parent);
        if (cache_only && parent_level != 1)
                return 0;
@@ -454,20 +450,23 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                if (search_start == 0)
                        search_start = last_block;
 
+               btrfs_tree_lock(cur);
                err = __btrfs_cow_block(trans, root, cur, parent, i,
-                                       &tmp, search_start,
+                                       &cur, search_start,
                                        min(16 * blocksize,
                                            (end_slot - i) * blocksize));
                if (err) {
+                       btrfs_tree_unlock(cur);
                        free_extent_buffer(cur);
                        break;
                }
-               search_start = tmp->start;
-               last_block = tmp->start;
+               search_start = cur->start;
+               last_block = cur->start;
                *last_ret = search_start;
                if (parent_level == 1)
-                       btrfs_clear_buffer_defrag(tmp);
-               free_extent_buffer(tmp);
+                       btrfs_clear_buffer_defrag(cur);
+               btrfs_tree_unlock(cur);
+               free_extent_buffer(cur);
        }
        if (parent->map_token) {
                unmap_extent_buffer(parent, parent->map_token,
@@ -2970,6 +2969,35 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
        return 1;
 }
 
+int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
+                       struct btrfs_key *key, int lowest_level)
+{
+       int level = lowest_level;
+       int slot;
+       struct extent_buffer *c;
+
+       while(level < BTRFS_MAX_LEVEL) {
+               if (!path->nodes[level])
+                       return 1;
+
+               slot = path->slots[level] + 1;
+               c = path->nodes[level];
+               if (slot >= btrfs_header_nritems(c)) {
+                       level++;
+                       if (level == BTRFS_MAX_LEVEL) {
+                               return 1;
+                       }
+                       continue;
+               }
+               if (level == 0)
+                       btrfs_item_key_to_cpu(c, key, slot);
+               else
+                       btrfs_node_key_to_cpu(c, key, slot);
+               return 0;
+       }
+       return 1;
+}
+
 /*
  * search the tree again to find a leaf with greater keys
  * returns 0 if it found something or 1 if there are no greater leaves.
index 244fe86..ca8e6f1 100644 (file)
@@ -1411,6 +1411,8 @@ int btrfs_previous_item(struct btrfs_root *root,
 
 struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
+int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
+                       struct btrfs_key *key, int lowest_level);
 
 int btrfs_cow_block(struct btrfs_trans_handle *trans,
                    struct btrfs_root *root, struct extent_buffer *buf,
index 89cc4f6..a9b3a25 100644 (file)
@@ -2201,6 +2201,7 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
 {
        mutex_unlock(&root->fs_info->alloc_mutex);
        lookup_extent_ref(NULL, root, start, len, refs);
+       cond_resched();
        mutex_lock(&root->fs_info->alloc_mutex);
        return lookup_extent_ref(NULL, root, start, len, refs);
 }
@@ -2280,6 +2281,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
 
                        next = read_tree_block(root, bytenr, blocksize,
                                               ptr_gen);
+                       cond_resched();
                        mutex_lock(&root->fs_info->alloc_mutex);
 
                        /* we've dropped the lock, double check */
@@ -2329,6 +2331,7 @@ out:
        *level += 1;
        BUG_ON(ret);
        mutex_unlock(&root->fs_info->alloc_mutex);
+       cond_resched();
        return 0;
 }
 
@@ -2448,6 +2451,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
                        break;
                if (wret < 0)
                        ret = wret;
+               if (trans->transaction->in_commit) {
+                       ret = -EAGAIN;
+                       break;
+               }
        }
        for (i = 0; i <= orig_level; i++) {
                if (path->nodes[i]) {
index fab851d..1677e4e 100644 (file)
 #include "disk-io.h"
 #include "print-tree.h"
 #include "transaction.h"
-
-static void reada_defrag(struct btrfs_root *root,
-                        struct extent_buffer *node)
-{
-       int i;
-       u32 nritems;
-       u64 bytenr;
-       u64 gen;
-       u32 blocksize;
-       int ret;
-
-       blocksize = btrfs_level_size(root, btrfs_header_level(node) - 1);
-       nritems = btrfs_header_nritems(node);
-       for (i = 0; i < nritems; i++) {
-               bytenr = btrfs_node_blockptr(node, i);
-               gen = btrfs_node_ptr_generation(node, i);
-               ret = readahead_tree_block(root, bytenr, blocksize, gen);
-               if (ret)
-                       break;
-       }
-}
-
-static int defrag_walk_down(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root,
-                           struct btrfs_path *path, int *level,
-                           int cache_only, u64 *last_ret)
-{
-       struct extent_buffer *next;
-       struct extent_buffer *cur;
-       u64 bytenr;
-       u64 ptr_gen;
-       int ret = 0;
-       int is_extent = 0;
-
-       WARN_ON(*level < 0);
-       WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
-       if (root->fs_info->extent_root == root)
-               is_extent = 1;
-
-       if (*level == 1 && cache_only && path->nodes[1] &&
-           !btrfs_buffer_defrag(path->nodes[1])) {
-               goto out;
-       }
-       while(*level > 0) {
-               WARN_ON(*level < 0);
-               WARN_ON(*level >= BTRFS_MAX_LEVEL);
-               cur = path->nodes[*level];
-
-               if (!cache_only && *level > 1 && path->slots[*level] == 0)
-                       reada_defrag(root, cur);
-
-               if (btrfs_header_level(cur) != *level)
-                       WARN_ON(1);
-
-               if (path->slots[*level] >=
-                   btrfs_header_nritems(cur))
-                       break;
-
-               if (*level == 1) {
-                       WARN_ON(btrfs_header_generation(path->nodes[*level]) !=
-                                                       trans->transid);
-                       ret = btrfs_realloc_node(trans, root,
-                                                path->nodes[*level],
-                                                path->slots[*level],
-                                                cache_only, last_ret,
-                                                &root->defrag_progress);
-                       if (is_extent)
-                               btrfs_extent_post_op(trans, root);
-
-                       break;
-               }
-               bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
-               ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
-
-               if (cache_only) {
-                       next = btrfs_find_tree_block(root, bytenr,
-                                          btrfs_level_size(root, *level - 1));
-                       if (!next || !btrfs_buffer_uptodate(next, ptr_gen) ||
-                           !btrfs_buffer_defrag(next)) {
-                               free_extent_buffer(next);
-                               path->slots[*level]++;
-                               continue;
-                       }
-               } else {
-                       next = read_tree_block(root, bytenr,
-                                      btrfs_level_size(root, *level - 1),
-                                      ptr_gen);
-               }
-               ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
-                                     path->slots[*level], &next);
-               BUG_ON(ret);
-               if (is_extent)
-                       btrfs_extent_post_op(trans, root);
-
-               WARN_ON(*level <= 0);
-               if (path->nodes[*level-1])
-                       free_extent_buffer(path->nodes[*level-1]);
-               path->nodes[*level-1] = next;
-               *level = btrfs_header_level(next);
-               path->slots[*level] = 0;
-       }
-       WARN_ON(*level < 0);
-       WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
-       btrfs_clear_buffer_defrag(path->nodes[*level]);
-out:
-       free_extent_buffer(path->nodes[*level]);
-       path->nodes[*level] = NULL;
-       *level += 1;
-       WARN_ON(ret && ret != -EAGAIN);
-       return ret;
-}
-
-static int defrag_walk_up(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root,
-                         struct btrfs_path *path, int *level,
-                         int cache_only)
-{
-       int i;
-       int slot;
-       struct extent_buffer *node;
-
-       for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
-               slot = path->slots[i];
-               if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
-                       path->slots[i]++;
-                       *level = i;
-                       node = path->nodes[i];
-                       WARN_ON(i == 0);
-                       btrfs_node_key_to_cpu(node, &root->defrag_progress,
-                                             path->slots[i]);
-                       root->defrag_level = i;
-                       return 0;
-               } else {
-                       btrfs_clear_buffer_defrag(path->nodes[*level]);
-                       free_extent_buffer(path->nodes[*level]);
-                       path->nodes[*level] = NULL;
-                       *level = i + 1;
-               }
-       }
-       return 1;
-}
+#include "locking.h"
 
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, int cache_only)
 {
        struct btrfs_path *path = NULL;
-       struct extent_buffer *tmp;
+       struct btrfs_key key;
        int ret = 0;
        int wret;
        int level;
        int orig_level;
        int i;
        int is_extent = 0;
+       int next_key_ret = 0;
        u64 last_ret = 0;
 
-       if (root->fs_info->extent_root == root)
+       if (root->fs_info->extent_root == root) {
+               mutex_lock(&root->fs_info->alloc_mutex);
                is_extent = 1;
-
-       goto out;
+       }
 
        if (root->ref_cows == 0 && !is_extent)
                goto out;
@@ -200,67 +59,63 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
                goto out;
        }
        if (root->defrag_progress.objectid == 0) {
+               struct extent_buffer *root_node;
                u32 nritems;
 
-               nritems = btrfs_header_nritems(root->node);
+               root_node = btrfs_lock_root_node(root);
+               nritems = btrfs_header_nritems(root_node);
                root->defrag_max.objectid = 0;
                /* from above we know this is not a leaf */
-               btrfs_node_key_to_cpu(root->node, &root->defrag_max,
+               btrfs_node_key_to_cpu(root_node, &root->defrag_max,
                                      nritems - 1);
-               extent_buffer_get(root->node);
-               ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
-               BUG_ON(ret);
-               path->nodes[level] = root->node;
-               path->slots[level] = 0;
-               if (is_extent)
-                       btrfs_extent_post_op(trans, root);
+               btrfs_tree_unlock(root_node);
+               free_extent_buffer(root_node);
+               memset(&key, 0, sizeof(key));
        } else {
-               level = root->defrag_level;
-               path->lowest_level = level;
-               wret = btrfs_search_slot(trans, root, &root->defrag_progress,
-                                        path, 0, 1);
-
-               if (is_extent)
-                       btrfs_extent_post_op(trans, root);
-
-               if (wret < 0) {
-                       ret = wret;
-                       goto out;
-               }
-
-               while(level > 0 && !path->nodes[level])
-                       level--;
-
-               if (!path->nodes[level]) {
-                       ret = 0;
-                       goto out;
-               }
+               memcpy(&key, &root->defrag_progress, sizeof(key));
        }
 
-       while(1) {
-               wret = defrag_walk_down(trans, root, path, &level, cache_only,
-                                       &last_ret);
-               if (wret > 0)
-                       break;
-               if (wret < 0)
-                       ret = wret;
+       path->lowest_level = 1;
+       path->keep_locks = 1;
+       wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 
-               wret = defrag_walk_up(trans, root, path, &level, cache_only);
-               if (wret > 0)
-                       break;
-               if (wret < 0)
-                       ret = wret;
-               else
-                       ret = -EAGAIN;
-               break;
+       if (wret < 0) {
+               ret = wret;
+               goto out;
+       }
+       if (!path->nodes[1]) {
+               ret = 0;
+               goto out;
+       }
+       path->slots[1] = btrfs_header_nritems(path->nodes[1]);
+       next_key_ret = btrfs_find_next_key(root, path, &key, 1);
+       ret = btrfs_realloc_node(trans, root,
+                                path->nodes[1], 0,
+                                cache_only, &last_ret,
+                                &root->defrag_progress);
+       WARN_ON(ret && ret != -EAGAIN);
+       if (next_key_ret == 0) {
+               memcpy(&root->defrag_progress, &key, sizeof(key));
+               ret = -EAGAIN;
        }
-       for (i = 0; i <= orig_level; i++) {
+
+       for (i = 1; i < BTRFS_MAX_LEVEL; i++) {
+               if (path->locks[i]) {
+                       btrfs_tree_unlock(path->nodes[i]);
+                       path->locks[i] = 0;
+               }
                if (path->nodes[i]) {
                        free_extent_buffer(path->nodes[i]);
                        path->nodes[i] = NULL;
                }
        }
+       if (is_extent)
+               btrfs_extent_post_op(trans, root);
+
 out:
+       if (is_extent)
+               mutex_unlock(&root->fs_info->alloc_mutex);
+
        if (path)
                btrfs_free_path(path);
        if (ret == -EAGAIN) {