faster btrfsck
authorChris Mason <chris.mason@oracle.com>
Mon, 23 Apr 2007 19:56:27 +0000 (15:56 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Mon, 23 Apr 2007 19:56:27 +0000 (15:56 -0400)
bit-radix.c
btrfsck.c
ctree.c
ctree.h
disk-io.c
disk-io.h

index cd5913a..7da97ce 100644 (file)
@@ -172,8 +172,8 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
        slot = start / BIT_RADIX_BITS_PER_ARRAY;
        ret = radix_tree_gang_lookup(radix, (void **)gang, slot,
                                     ARRAY_SIZE(gang));
+       found = start % BIT_RADIX_BITS_PER_ARRAY;
        for (i = 0; i < ret && nr > 0; i++) {
-               found = 0;
                bits = gang[i];
                while(nr > 0) {
                        found = find_next_bit(bits + 1,
@@ -189,6 +189,7 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
                        } else
                                break;
                }
+               found = 0;
        }
        return total_found;
 }
index 2f8630f..8cd2348 100644 (file)
--- a/btrfsck.c
+++ b/btrfsck.c
@@ -1,5 +1,7 @@
+#define _XOPEN_SOURCE 500
 #include <stdio.h>
 #include <stdlib.h>
+#include <fcntl.h>
 #include "kerncompat.h"
 #include "radix-tree.h"
 #include "ctree.h"
 
 u64 blocks_used = 0;
 struct extent_record {
+       struct btrfs_disk_key parent_key;
+       struct btrfs_disk_key node_key;
        u64 start;
        u64 nr;
        u64 owner;
        u32 refs;
+       u32 extent_tree_refs;
        u8 type;
 };
 
+static int check_node(struct btrfs_root *root,
+                     struct btrfs_disk_key *parent_key,
+                     struct btrfs_node *node)
+{
+       int i;
+       u32 nritems = btrfs_header_nritems(&node->header);
+
+       if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
+               return 1;
+       if (parent_key->flags) {
+               if (memcmp(parent_key, &node->ptrs[0].key,
+                             sizeof(struct btrfs_disk_key)))
+                       return 1;
+       }
+       for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+               struct btrfs_key cpukey;
+               btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
+               if (btrfs_comp_keys(&node->ptrs[i].key, &cpukey) >= 0)
+                       return 1;
+       }
+       return 0;
+}
+
+static int check_leaf(struct btrfs_root *root,
+                     struct btrfs_disk_key *parent_key,
+                     struct btrfs_leaf *leaf)
+{
+       int i;
+       u32 nritems = btrfs_header_nritems(&leaf->header);
+
+       if (btrfs_header_level(&leaf->header) != 0) {
+               fprintf(stderr, "leaf is not a leaf %Lu\n",
+                       btrfs_header_blocknr(&leaf->header));
+               return 1;
+       }
+       if (btrfs_leaf_free_space(root, leaf) < 0) {
+               fprintf(stderr, "leaf free space incorrect %Lu %d\n",
+                       btrfs_header_blocknr(&leaf->header),
+                       btrfs_leaf_free_space(root, leaf));
+               return 1;
+       }
+
+       if (nritems == 0)
+               return 0;
+
+       if (parent_key->flags) {
+               if (memcmp(parent_key, &leaf->items[0].key,
+                      sizeof(struct btrfs_disk_key))) {
+                       fprintf(stderr, "leaf parent key incorrect %Lu\n",
+                               btrfs_header_blocknr(&leaf->header));
+                       return 1;
+               }
+       }
+       for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+               struct btrfs_key cpukey;
+               btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
+               if (btrfs_comp_keys(&leaf->items[i].key,
+                                &cpukey) >= 0)
+                       return 1;
+               if (btrfs_item_offset(leaf->items + i) !=
+                       btrfs_item_end(leaf->items + i + 1))
+                       return 1;
+               if (i == 0) {
+                       if (btrfs_item_offset(leaf->items + i) +
+                              btrfs_item_size(leaf->items + i) !=
+                              BTRFS_LEAF_DATA_SIZE(root))
+                               return 1;
+               }
+       }
+       return 0;
+}
+
+static int check_block(struct btrfs_root *root,
+                      struct radix_tree_root *extent_radix,
+                      struct btrfs_buffer *buf)
+{
+       struct extent_record *rec;
+
+       rec = radix_tree_lookup(extent_radix, buf->blocknr);
+       if (!rec)
+               return 1;
+       if (btrfs_is_leaf(&buf->node)) {
+               return check_leaf(root, &rec->parent_key, &buf->leaf);
+       } else {
+               return check_node(root, &rec->parent_key, &buf->node);
+       }
+       return 1;
+}
+
 static int add_extent_rec(struct radix_tree_root *extent_radix,
-                         u64 ref, u64 start, u64 nr, u64 owner, u8 type)
+                         struct btrfs_disk_key *parent_key,
+                         u64 ref, u64 start, u64 nr, u64 owner, u8 type,
+                         int inc_ref)
 {
        struct extent_record *rec;
        int ret = 0;
        rec = radix_tree_lookup(extent_radix, start);
        if (rec) {
-               rec->refs++;
+               if (inc_ref)
+                       rec->refs++;
                if (owner != rec->owner) {
                        fprintf(stderr, "warning, owner mismatch %Lu\n", start);
                        ret = 1;
@@ -46,6 +143,10 @@ static int add_extent_rec(struct radix_tree_root *extent_radix,
        rec->nr = nr;
        rec->owner = owner;
        rec->type = type;
+       if (parent_key)
+               memcpy(&rec->parent_key, parent_key, sizeof(*parent_key));
+       else
+               memset(&rec->parent_key, 0, sizeof(*parent_key));
        ret = radix_tree_insert(extent_radix, start, rec);
        BUG_ON(ret);
        blocks_used += nr;
@@ -62,10 +163,36 @@ static int add_pending(struct radix_tree_root *pending,
        return 0;
 }
 
+static int pick_next_pending(struct radix_tree_root *pending,
+                       struct radix_tree_root *reada,
+                       struct radix_tree_root *nodes,
+                       u64 last, unsigned long *bits, int bits_nr)
+{
+       unsigned long node_start = last;
+       int ret;
+       ret = find_first_radix_bit(reada, bits, 0, 1);
+       if (ret)
+               return ret;
+       if (node_start > 8)
+               node_start -= 8;
+       ret = find_first_radix_bit(nodes, bits, node_start, bits_nr);
+       if (!ret)
+               ret = find_first_radix_bit(nodes, bits, 0, bits_nr);
+       if (ret)
+               return ret;
+       return find_first_radix_bit(pending, bits, 0, bits_nr);
+}
+
+static struct btrfs_buffer reada_buf;
+
 static int run_next_block(struct btrfs_root *root,
+                         unsigned long *bits,
+                         int bits_nr,
                          u64 *last,
                          struct radix_tree_root *pending,
                          struct radix_tree_root *seen,
+                         struct radix_tree_root *reada,
+                         struct radix_tree_root *nodes,
                          struct radix_tree_root *extent_radix)
 {
        struct btrfs_buffer *buf;
@@ -75,19 +202,34 @@ static int run_next_block(struct btrfs_root *root,
        int nritems;
        struct btrfs_leaf *leaf;
        struct btrfs_node *node;
-       unsigned long bits;
+       u64 last_block = 0;
 
-       ret = find_first_radix_bit(pending, &bits, *last, 1);
+       ret = pick_next_pending(pending, reada, nodes, *last, bits, bits_nr);
        if (ret == 0) {
-               ret = find_first_radix_bit(pending, &bits, 0, 1);
-               if (ret == 0)
-                       return 1;
+               return 1;
+       }
+       for(i = 0; i < ret; i++) {
+               u64 offset;
+               if (test_radix_bit(reada, bits[i]))
+                       continue;
+               set_radix_bit(reada, bits[i]);
+               btrfs_map_bh_to_logical(root, &reada_buf, bits[i]);
+               offset = reada_buf.dev_blocknr * root->blocksize;
+               last_block = bits[i];
+               readahead(reada_buf.fd, offset, root->blocksize);
        }
-       *last = bits;
-       blocknr = bits;
+
+       *last = bits[0];
+       blocknr = bits[0];
        clear_radix_bit(pending, blocknr);
+       clear_radix_bit(reada, blocknr);
+       clear_radix_bit(nodes, blocknr);
        buf = read_tree_block(root, blocknr);
        nritems = btrfs_header_nritems(&buf->node.header);
+       ret = check_block(root, extent_radix, buf);
+       if (ret) {
+               fprintf(stderr, "bad block %Lu\n", blocknr);
+       }
        if (btrfs_is_leaf(&buf->node)) {
                leaf = &buf->leaf;
                for (i = 0; i < nritems; i++) {
@@ -100,22 +242,30 @@ static int run_next_block(struct btrfs_root *root,
                        if (btrfs_file_extent_type(fi) !=
                            BTRFS_FILE_EXTENT_REG)
                                continue;
-                       ret = add_extent_rec(extent_radix, blocknr,
+                       ret = add_extent_rec(extent_radix, NULL, blocknr,
                                   btrfs_file_extent_disk_blocknr(fi),
                                   btrfs_file_extent_disk_num_blocks(fi),
                                   btrfs_disk_key_objectid(&leaf->items[i].key),
-                                  BTRFS_EXTENT_FILE);
+                                  BTRFS_EXTENT_FILE, 1);
                        BUG_ON(ret);
                }
        } else {
+               int level;
                node = &buf->node;
+               level = btrfs_header_level(&node->header);
                for (i = 0; i < nritems; i++) {
                        u64 ptr = btrfs_node_blockptr(node, i);
-                       ret = add_extent_rec(extent_radix, blocknr, ptr, 1,
+                       ret = add_extent_rec(extent_radix,
+                                            &node->ptrs[i].key,
+                                            blocknr, ptr, 1,
                                             btrfs_header_owner(&node->header),
-                                            BTRFS_EXTENT_TREE);
+                                            BTRFS_EXTENT_TREE, 1);
                        BUG_ON(ret);
-                       add_pending(pending, seen, ptr);
+                       if (level > 1) {
+                               add_pending(nodes, seen, ptr);
+                       } else {
+                               add_pending(pending, seen, ptr);
+                       }
                }
        }
        btrfs_block_release(root, buf);
@@ -123,42 +273,63 @@ static int run_next_block(struct btrfs_root *root,
 }
 
 static int add_root_to_pending(struct btrfs_root *root,
+                              unsigned long *bits,
+                              int bits_nr,
                               struct radix_tree_root *extent_radix,
                               struct radix_tree_root *pending,
-                              struct radix_tree_root *seen)
+                              struct radix_tree_root *seen,
+                              struct radix_tree_root *reada,
+                              struct radix_tree_root *nodes)
 {
        add_pending(pending, seen, root->node->blocknr);
-       add_extent_rec(extent_radix, 0, root->node->blocknr, 1,
+       add_extent_rec(extent_radix, NULL, 0, root->node->blocknr, 1,
                       btrfs_header_owner(&root->node->node.header),
-                      BTRFS_EXTENT_TREE);
+                      BTRFS_EXTENT_TREE, 1);
        return 0;
 }
+
 int main(int ac, char **av) {
        struct btrfs_super_block super;
        struct btrfs_root *root;
        struct radix_tree_root extent_radix;
        struct radix_tree_root seen;
        struct radix_tree_root pending;
+       struct radix_tree_root reada;
+       struct radix_tree_root nodes;
        int ret;
        u64 last = 0;
+       unsigned long *bits;
+       int bits_nr;
 
        radix_tree_init();
 
+
        INIT_RADIX_TREE(&extent_radix, GFP_NOFS);
        init_bit_radix(&seen);
        init_bit_radix(&pending);
+       init_bit_radix(&reada);
+       init_bit_radix(&nodes);
 
        root = open_ctree(av[1], &super);
-       add_root_to_pending(root, &extent_radix, &pending, &seen);
-       add_root_to_pending(root->fs_info->tree_root,&extent_radix,
-                           &pending, &seen);
-       add_root_to_pending(root->fs_info->dev_root, &extent_radix,
-                           &pending, &seen);
-       add_root_to_pending(root->fs_info->extent_root, &extent_radix,
-                           &pending, &seen);
+
+       bits_nr = 1024 * 1024 / root->blocksize;
+       bits = malloc(bits_nr * sizeof(unsigned long));
+       if (!bits) {
+               perror("malloc");
+               exit(1);
+       }
+
+       add_root_to_pending(root, bits, bits_nr, &extent_radix,
+                           &pending, &seen, &reada, &nodes);
+       add_root_to_pending(root->fs_info->tree_root, bits, bits_nr,
+                           &extent_radix, &pending, &seen, &reada, &nodes);
+       add_root_to_pending(root->fs_info->dev_root, bits, bits_nr,
+                           &extent_radix, &pending, &seen, &reada, &nodes);
+       add_root_to_pending(root->fs_info->extent_root, bits, bits_nr,
+                           &extent_radix, &pending, &seen, &reada, &nodes);
        while(1) {
-               ret = run_next_block(root, &last, &pending,
-                                    &seen, &extent_radix);
+               ret = run_next_block(root, bits, bits_nr, &last, &pending,
+                                    &seen, &reada, &nodes, &extent_radix);
                if (ret != 0)
                        break;
        }
diff --git a/ctree.c b/ctree.c
index 0c85b0b..ab8f9d0 100644 (file)
--- a/ctree.c
+++ b/ctree.c
@@ -83,22 +83,44 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
 }
 
 /*
+ * how many bytes are required to store the items in a leaf.  start
+ * and nr indicate which items in the leaf to check.  This totals up the
+ * space used both by the item structs and the item data
+ */
+static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
+{
+       int data_len;
+       int nritems = btrfs_header_nritems(&l->header);
+       int end;
+
+       if (nritems < start + nr)
+               end = nritems - 1;
+       else
+               end = start + nr - 1;
+
+       if (!nr)
+               return 0;
+       data_len = btrfs_item_end(l->items + start);
+       data_len = data_len - btrfs_item_offset(l->items + end);
+       data_len += sizeof(struct btrfs_item) * nr;
+       return data_len;
+}
+
+/*
  * The space between the end of the leaf items and
  * the start of the leaf data.  IOW, how much room
  * the leaf has left for both items and data
  */
 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
 {
-       int data_end = leaf_data_end(root, leaf);
        int nritems = btrfs_header_nritems(&leaf->header);
-       char *items_end = (char *)(leaf->items + nritems + 1);
-       return (char *)(btrfs_leaf_data(leaf) + data_end) - (char *)items_end;
+       return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
 }
 
 /*
  * compare two keys in a memcmp fashion
  */
-static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
+int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
 {
        struct btrfs_key k1;
 
@@ -144,7 +166,7 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path,
        for (i = 0; nritems > 1 && i < nritems - 2; i++) {
                struct btrfs_key cpukey;
                btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
-               BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
+               BUG_ON(btrfs_comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
        }
        return 0;
 }
@@ -177,7 +199,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
        for (i = 0; nritems > 1 && i < nritems - 2; i++) {
                struct btrfs_key cpukey;
                btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
-               BUG_ON(comp_keys(&leaf->items[i].key,
+               BUG_ON(btrfs_comp_keys(&leaf->items[i].key,
                                 &cpukey) >= 0);
                BUG_ON(btrfs_item_offset(leaf->items + i) !=
                        btrfs_item_end(leaf->items + i + 1));
@@ -219,7 +241,7 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
        while(low < high) {
                mid = (low + high) / 2;
                tmp = (struct btrfs_disk_key *)(p + mid * item_size);
-               ret = comp_keys(tmp, key);
+               ret = btrfs_comp_keys(tmp, key);
 
                if (ret < 0)
                        low = mid + 1;
@@ -775,24 +797,6 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
 }
 
 /*
- * how many bytes are required to store the items in a leaf.  start
- * and nr indicate which items in the leaf to check.  This totals up the
- * space used both by the item structs and the item data
- */
-static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
-{
-       int data_len;
-       int end = start + nr - 1;
-
-       if (!nr)
-               return 0;
-       data_len = btrfs_item_end(l->items + start);
-       data_len = data_len - btrfs_item_offset(l->items + end);
-       data_len += sizeof(struct btrfs_item) * nr;
-       return data_len;
-}
-
-/*
  * push some data in the path leaf to the right, trying to free up at
  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
  *
diff --git a/ctree.h b/ctree.h
index 55a323e..e2ea14f 100644 (file)
--- a/ctree.h
+++ b/ctree.h
@@ -980,6 +980,7 @@ static inline void btrfs_set_device_id(struct btrfs_device_item *d,
        ((type *)(btrfs_leaf_data(leaf) + \
        btrfs_item_offset((leaf)->items + (slot))))
 
+int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2);
 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
                      *root, struct btrfs_path *path, u32 data_size);
 struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
index 871900c..0d921f1 100644 (file)
--- a/disk-io.c
+++ b/disk-io.c
@@ -1,4 +1,5 @@
-#define _XOPEN_SOURCE 500
+#define _XOPEN_SOURCE 600
+#define __USE_XOPEN2K
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
@@ -336,6 +337,9 @@ int btrfs_open_disk(struct btrfs_root *root, u64 device_id,
                ret = -1;
                goto out;
        }
+
+       posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM);
+       posix_fadvise(fd, 0, 0, POSIX_FADV_NOREUSE);
        ret = btrfs_insert_dev_radix(root, fd, device_id,
                                     block_start, num_blocks);
        BUG_ON(ret);
index 87cf36b..e856794 100644 (file)
--- a/disk-io.h
+++ b/disk-io.h
@@ -31,6 +31,8 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s);
 void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf);
 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                      struct btrfs_super_block *s);
+int btrfs_map_bh_to_logical(struct btrfs_root *root, struct btrfs_buffer *bh,
+                            u64 logical);
 #define BTRFS_SUPER_INFO_OFFSET (16 * 1024)
 
 #endif