Btrfsck: add the ability to prune corrupt extent allocation tree blocks
authorChris Mason <chris.mason@oracle.com>
Tue, 21 Feb 2012 19:37:21 +0000 (14:37 -0500)
committerChris Mason <chris.mason@oracle.com>
Tue, 21 Feb 2012 19:37:21 +0000 (14:37 -0500)
When we discover bad blocks in the extent allocation tree, repair can
now discard them and recreate the references from the rest of the trees.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
Makefile
btrfs-corrupt-block.c
btrfsck.c
ctree.c
ctree.h
repair.c [new file with mode: 0644]
repair.h [new file with mode: 0644]

index 3b887e1..79818e6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ CFLAGS = -g -O0
 objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
          root-tree.o dir-item.o file-item.o inode-item.o \
          inode-map.o crc32c.o rbtree.o extent-cache.o extent_io.o \
-         volumes.o utils.o btrfs-list.o btrfslabel.o
+         volumes.o utils.o btrfs-list.o btrfslabel.o repair.o
 cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \
               cmds-inspect.o cmds-balance.o
 
index 9ad3e05..980a006 100644 (file)
@@ -93,6 +93,56 @@ static void print_usage(void)
        exit(1);
 }
 
+static void corrupt_keys(struct btrfs_trans_handle *trans,
+                        struct btrfs_root *root,
+                        struct extent_buffer *eb)
+{
+       int slot;
+       int bad_slot;
+       int nr;
+       struct btrfs_disk_key bad_key;;
+
+       nr = btrfs_header_nritems(eb);
+       if (nr == 0)
+               return;
+
+       slot = rand() % nr;
+       bad_slot = rand() % nr;
+
+       if (bad_slot == slot)
+               return;
+
+       fprintf(stderr, "corrupting keys in block %llu slot %d swapping with %d\n",
+               (unsigned long long)eb->start, slot, bad_slot);
+
+       if (btrfs_header_level(eb) == 0) {
+               btrfs_item_key(eb, &bad_key, bad_slot);
+               btrfs_set_item_key(eb, &bad_key, slot);
+       } else {
+               btrfs_node_key(eb, &bad_key, bad_slot);
+               btrfs_set_node_key(eb, &bad_key, slot);
+       }
+       btrfs_mark_buffer_dirty(eb);
+       if (!trans) {
+               csum_tree_block(root, eb, 0);
+               write_extent_to_disk(eb);
+       }
+}
+
+
+static int corrupt_keys_in_block(struct btrfs_root *root, u64 bytenr)
+{
+       struct extent_buffer *eb;
+
+       eb = read_tree_block(root, bytenr, root->leafsize, 0);
+       if (!eb)
+               return -EIO;;
+
+       corrupt_keys(NULL, root, eb);
+       free_extent_buffer(eb);
+       return 0;
+}
+
 static int corrupt_extent(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, u64 bytenr, int copy)
 {
@@ -192,6 +242,11 @@ static void btrfs_corrupt_extent_tree(struct btrfs_trans_handle *trans,
        if (!eb)
                return;
 
+       if ((rand() % 10) == 0) {
+               corrupt_keys(trans, root, eb);
+               return;
+       }
+
        nr = btrfs_header_nritems(eb);
        if (btrfs_is_leaf(eb)) {
                btrfs_corrupt_extent_leaf(trans, root, eb);
@@ -222,6 +277,7 @@ static struct option long_options[] = {
        { "bytes", 1, NULL, 'b' },
        { "extent-record", 0, NULL, 'e' },
        { "extent-tree", 0, NULL, 'E' },
+       { "keys", 0, NULL, 'k' },
        { 0, 0, 0, 0}
 };
 
@@ -239,12 +295,13 @@ int main(int ac, char **av)
        u64 bytes = 4096;
        int extent_rec = 0;
        int extent_tree = 0;
+       int corrupt_block_keys = 0;
 
        srand(128);
 
        while(1) {
                int c;
-               c = getopt_long(ac, av, "l:c:eE", long_options,
+               c = getopt_long(ac, av, "l:c:eEk", long_options,
                                &option_index);
                if (c < 0)
                        break;
@@ -279,6 +336,9 @@ int main(int ac, char **av)
                        case 'E':
                                extent_tree = 1;
                                break;
+                       case 'k':
+                               corrupt_block_keys = 1;
+                               break;
                        default:
                                print_usage();
                }
@@ -324,8 +384,13 @@ int main(int ac, char **av)
        bytes *= root->sectorsize;
 
        while (bytes > 0) {
-               eb = debug_corrupt_block(root, logical, root->sectorsize, copy);
-               free_extent_buffer(eb);
+               if (corrupt_block_keys) {
+                       corrupt_keys_in_block(root, logical);
+               } else {
+                       eb = debug_corrupt_block(root, logical,
+                                                root->sectorsize, copy);
+                       free_extent_buffer(eb);
+               }
                logical += root->sectorsize;
                bytes -= root->sectorsize;
        }
index 1935dd7..127f119 100644 (file)
--- a/btrfsck.c
+++ b/btrfsck.c
@@ -26,6 +26,7 @@
 #include <getopt.h>
 #include "kerncompat.h"
 #include "ctree.h"
+#include "repair.h"
 #include "disk-io.h"
 #include "print-tree.h"
 #include "transaction.h"
@@ -1718,86 +1719,6 @@ static int check_fs_roots(struct btrfs_root *root,
        return err;
 }
 
-static int check_node(struct btrfs_root *root,
-                     struct btrfs_disk_key *parent_key,
-                     struct extent_buffer *buf)
-{
-       int i;
-       struct btrfs_key cpukey;
-       struct btrfs_disk_key key;
-       u32 nritems = btrfs_header_nritems(buf);
-
-       if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
-               return 1;
-       if (parent_key->type) {
-               btrfs_node_key(buf, &key, 0);
-               if (memcmp(parent_key, &key, sizeof(key)))
-                       return 1;
-       }
-       for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-               btrfs_node_key(buf, &key, i);
-               btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
-               if (btrfs_comp_keys(&key, &cpukey) >= 0)
-                       return 1;
-       }
-       return 0;
-}
-
-static int check_leaf(struct btrfs_root *root,
-                     struct btrfs_disk_key *parent_key,
-                     struct extent_buffer *buf)
-{
-       int i;
-       struct btrfs_key cpukey;
-       struct btrfs_disk_key key;
-       u32 nritems = btrfs_header_nritems(buf);
-
-       if (btrfs_header_level(buf) != 0) {
-               fprintf(stderr, "leaf is not a leaf %llu\n",
-                      (unsigned long long)btrfs_header_bytenr(buf));
-               return 1;
-       }
-       if (btrfs_leaf_free_space(root, buf) < 0) {
-               fprintf(stderr, "leaf free space incorrect %llu %d\n",
-                       (unsigned long long)btrfs_header_bytenr(buf),
-                       btrfs_leaf_free_space(root, buf));
-               return 1;
-       }
-
-       if (nritems == 0)
-               return 0;
-
-       btrfs_item_key(buf, &key, 0);
-       if (parent_key->type && memcmp(parent_key, &key, sizeof(key))) {
-               fprintf(stderr, "leaf parent key incorrect %llu\n",
-                      (unsigned long long)btrfs_header_bytenr(buf));
-               return 1;
-       }
-       for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-               btrfs_item_key(buf, &key, i);
-               btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
-               if (btrfs_comp_keys(&key, &cpukey) >= 0) {
-                       fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
-                       return 1;
-               }
-               if (btrfs_item_offset_nr(buf, i) !=
-                       btrfs_item_end_nr(buf, i + 1)) {
-                       fprintf(stderr, "incorrect offsets %u %u\n",
-                               btrfs_item_offset_nr(buf, i),
-                               btrfs_item_end_nr(buf, i + 1));
-                       return 1;
-               }
-               if (i == 0 && btrfs_item_end_nr(buf, i) !=
-                   BTRFS_LEAF_DATA_SIZE(root)) {
-                       fprintf(stderr, "bad item end %u wanted %u\n",
-                               btrfs_item_end_nr(buf, i),
-                               (unsigned)BTRFS_LEAF_DATA_SIZE(root));
-                       return 1;
-               }
-       }
-       return 0;
-}
-
 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 {
        struct list_head *cur = rec->backrefs.next;
@@ -1954,7 +1875,7 @@ static int check_owner_ref(struct btrfs_root *root,
                btrfs_item_key_to_cpu(buf, &key, 0);
        else
                btrfs_node_key_to_cpu(buf, &key, 0);
-       
+
        btrfs_init_path(&path);
        path.lowest_level = level + 1;
        btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
@@ -1967,6 +1888,48 @@ static int check_owner_ref(struct btrfs_root *root,
        return found ? 0 : 1;
 }
 
+static int is_extent_tree_record(struct extent_record *rec)
+{
+       struct list_head *cur = rec->backrefs.next;
+       struct extent_backref *node;
+       struct tree_backref *back;
+       int is_extent = 0;
+
+       while(cur != &rec->backrefs) {
+               node = list_entry(cur, struct extent_backref, list);
+               cur = cur->next;
+               if (node->is_data)
+                       return 0;
+               back = (struct tree_backref *)node;
+               if (node->full_backref)
+                       return 0;
+               if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
+                       is_extent = 1;
+       }
+       return is_extent;
+}
+
+
+static int record_bad_block_io(struct btrfs_fs_info *info,
+                              struct cache_tree *extent_cache,
+                              u64 start, u64 len)
+{
+       struct extent_record *rec;
+       struct cache_extent *cache;
+       struct btrfs_key key;
+
+       cache = find_cache_extent(extent_cache, start, len);
+       if (!cache)
+               return 0;
+
+       rec = container_of(cache, struct extent_record, cache);
+       if (!is_extent_tree_record(rec))
+               return 0;
+
+       btrfs_disk_key_to_cpu(&key, &rec->parent_key);
+       return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
+}
+
 static int check_block(struct btrfs_root *root,
                       struct cache_tree *extent_cache,
                       struct extent_buffer *buf, u64 flags)
@@ -1995,11 +1958,11 @@ static int check_block(struct btrfs_root *root,
        }
        rec->info_level = level;
 
-       if (btrfs_is_leaf(buf)) {
-               ret = check_leaf(root, &rec->parent_key, buf);
-       } else {
-               ret = check_node(root, &rec->parent_key, buf);
-       }
+       if (btrfs_is_leaf(buf))
+               ret = btrfs_check_leaf(root, &rec->parent_key, buf);
+       else
+               ret = btrfs_check_node(root, &rec->parent_key, buf);
+
        if (ret) {
                fprintf(stderr, "bad block %llu\n",
                        (unsigned long long)buf->start);
@@ -2550,6 +2513,13 @@ static int run_next_block(struct btrfs_root *root,
 
        /* fixme, get the real parent transid */
        buf = read_tree_block(root, bytenr, size, 0);
+       if (!extent_buffer_uptodate(buf)) {
+               record_bad_block_io(root->fs_info,
+                                   extent_cache, bytenr, size);
+               free_extent_buffer(buf);
+               goto out;
+       }
+
        nritems = btrfs_header_nritems(buf);
 
        ret = btrfs_lookup_extent_info(NULL, root, bytenr, size, NULL, &flags);
@@ -2565,6 +2535,8 @@ static int run_next_block(struct btrfs_root *root,
        }
 
        ret = check_block(root, extent_cache, buf, flags);
+       if (ret)
+               goto out;
 
        if (btrfs_is_leaf(buf)) {
                btree_space_waste += btrfs_leaf_free_space(root, buf);
@@ -2691,6 +2663,7 @@ static int run_next_block(struct btrfs_root *root,
            btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
            !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
                found_old_backref = 1;
+out:
        free_extent_buffer(buf);
        return 0;
 }
@@ -3016,6 +2989,7 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans,
        int ret;
        struct btrfs_path *path;
        struct list_head *cur = rec->backrefs.next;
+       struct cache_extent *cache;
        struct extent_backref *back;
        int allocated = 0;
        u64 flags = 0;
@@ -3035,6 +3009,13 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans,
        if (ret < 0)
                goto out;
 
+       /* was this block corrupt?  If so, don't add references to it */
+       cache = find_cache_extent(info->corrupt_blocks, rec->start, rec->max_size);
+       if (cache) {
+               ret = 0;
+               goto out;
+       }
+
        /* step two, recreate all the refs we did find */
        while(cur != &rec->backrefs) {
                back = list_entry(cur, struct extent_backref, list);
@@ -3058,6 +3039,107 @@ out:
        return ret;
 }
 
+/* right now we only prune from the extent allocation tree */
+static int prune_one_block(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *info,
+                          struct btrfs_corrupt_block *corrupt)
+{
+       int ret;
+       struct btrfs_path path;
+       struct extent_buffer *eb;
+       u64 found;
+       int slot;
+       int nritems;
+       int level = corrupt->level + 1;
+
+       btrfs_init_path(&path);
+again:
+       /* we want to stop at the parent to our busted block */
+       path.lowest_level = level;
+
+       ret = btrfs_search_slot(trans, info->extent_root,
+                               &corrupt->key, &path, -1, 1);
+
+       if (ret < 0)
+               goto out;
+
+       eb = path.nodes[level];
+       if (!eb) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       /*
+        * hopefully the search gave us the block we want to prune,
+        * lets try that first
+        */
+       slot = path.slots[level];
+       found =  btrfs_node_blockptr(eb, slot);
+       if (found == corrupt->cache.start)
+               goto del_ptr;
+
+       nritems = btrfs_header_nritems(eb);
+
+       /* the search failed, lets scan this node and hope we find it */
+       for (slot = 0; slot < nritems; slot++) {
+               found =  btrfs_node_blockptr(eb, slot);
+               if (found == corrupt->cache.start)
+                       goto del_ptr;
+       }
+       /*
+        * we couldn't find the bad block.  TODO, search all the nodes for pointers
+        * to this block
+        */
+       if (eb == info->extent_root->node) {
+               ret = -ENOENT;
+               goto out;
+       } else {
+               level++;
+               btrfs_release_path(NULL, &path);
+               goto again;
+       }
+
+del_ptr:
+       printk("deleting pointer to block %Lu\n", corrupt->cache.start);
+       ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
+
+out:
+       btrfs_release_path(NULL, &path);
+       return ret;
+}
+
+static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
+                               struct btrfs_fs_info *info)
+{
+       struct cache_extent *cache;
+       struct btrfs_corrupt_block *corrupt;
+
+       cache = find_first_cache_extent(info->corrupt_blocks, 0);
+       while (1) {
+               if (!cache)
+                       break;
+               corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
+               prune_one_block(trans, info, corrupt);
+               cache = next_cache_extent(cache);
+       }
+       return 0;
+}
+
+static void free_corrupt_blocks(struct btrfs_fs_info *info)
+{
+       struct cache_extent *cache;
+       struct btrfs_corrupt_block *corrupt;
+
+       while (1) {
+               cache = find_first_cache_extent(info->corrupt_blocks, 0);
+               if (!cache)
+                       break;
+               corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
+               remove_cache_extent(info->corrupt_blocks, cache);
+               free(corrupt);
+       }
+}
+
 static int check_extent_refs(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             struct cache_tree *extent_cache, int repair)
@@ -3082,6 +3164,16 @@ static int check_extent_refs(struct btrfs_trans_handle *trans,
                                         rec->start, rec->max_size);
                        cache = next_cache_extent(cache);
                }
+
+               /* pin down all the corrupted blocks too */
+               cache = find_first_cache_extent(root->fs_info->corrupt_blocks, 0);
+               while(cache) {
+                       rec = container_of(cache, struct extent_record, cache);
+                       btrfs_pin_extent(root->fs_info,
+                                        rec->start, rec->max_size);
+                       cache = next_cache_extent(cache);
+               }
+               prune_corrupt_blocks(trans, root->fs_info);
        }
        while(1) {
                fixed = 0;
@@ -3159,6 +3251,7 @@ static int check_extents(struct btrfs_trans_handle *trans,
        struct cache_tree pending;
        struct cache_tree reada;
        struct cache_tree nodes;
+       struct cache_tree corrupt_blocks;
        struct btrfs_path path;
        struct btrfs_key key;
        struct btrfs_key found_key;
@@ -3175,10 +3268,12 @@ static int check_extents(struct btrfs_trans_handle *trans,
        cache_tree_init(&pending);
        cache_tree_init(&nodes);
        cache_tree_init(&reada);
+       cache_tree_init(&corrupt_blocks);
 
        if (repair) {
                root->fs_info->fsck_extent_cache = &extent_cache;
                root->fs_info->free_extent_hook = free_extent_hook;
+               root->fs_info->corrupt_blocks = &corrupt_blocks;
        }
 
        bits_nr = 1024;
@@ -3241,8 +3336,10 @@ static int check_extents(struct btrfs_trans_handle *trans,
        ret = check_extent_refs(trans, root, &extent_cache, repair);
 
        if (repair) {
+               free_corrupt_blocks(root->fs_info);
                root->fs_info->fsck_extent_cache = NULL;
                root->fs_info->free_extent_hook = NULL;
+               root->fs_info->corrupt_blocks = NULL;
        }
 
        return ret;
diff --git a/ctree.c b/ctree.c
index 282c868..a49bce4 100644 (file)
--- a/ctree.c
+++ b/ctree.c
@@ -19,6 +19,7 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "print-tree.h"
+#include "repair.h"
 
 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
                      *root, struct btrfs_path *path, int level);
@@ -32,8 +33,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct extent_buffer *dst_buf,
                              struct extent_buffer *src_buf);
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                  struct btrfs_path *path, int level, int slot);
 
 inline void btrfs_init_path(struct btrfs_path *p)
 {
@@ -589,156 +588,125 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
        return btrfs_item_offset_nr(leaf, nr - 1);
 }
 
-static int check_node(struct btrfs_root *root, struct btrfs_path *path,
-                     int level)
+int btrfs_check_node(struct btrfs_root *root,
+                     struct btrfs_disk_key *parent_key,
+                     struct extent_buffer *buf)
 {
-       struct extent_buffer *parent = NULL;
-       struct extent_buffer *node = path->nodes[level];
-       struct btrfs_disk_key parent_key;
-       struct btrfs_disk_key node_key;
-       int parent_slot;
-       int slot;
+       int i;
        struct btrfs_key cpukey;
-       u32 nritems = btrfs_header_nritems(node);
+       struct btrfs_disk_key key;
+       u32 nritems = btrfs_header_nritems(buf);
 
-       if (path->nodes[level + 1])
-               parent = path->nodes[level + 1];
+       if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
+               goto fail;
 
-       slot = path->slots[level];
-       BUG_ON(nritems == 0);
-       if (parent) {
-               parent_slot = path->slots[level + 1];
-               btrfs_node_key(parent, &parent_key, parent_slot);
-               btrfs_node_key(node, &node_key, 0);
-               BUG_ON(memcmp(&parent_key, &node_key,
-                             sizeof(struct btrfs_disk_key)));
-               BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-                      btrfs_header_bytenr(node));
-       }
-       BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
-       if (slot != 0) {
-               btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
-               btrfs_node_key(node, &node_key, slot);
-               BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0);
-       }
-       if (slot < nritems - 1) {
-               btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
-               btrfs_node_key(node, &node_key, slot);
-               BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0);
+       if (parent_key && parent_key->type) {
+               btrfs_node_key(buf, &key, 0);
+               if (memcmp(parent_key, &key, sizeof(key)))
+                       goto fail;
+       }
+       for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+               btrfs_node_key(buf, &key, i);
+               btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
+               if (btrfs_comp_keys(&key, &cpukey) >= 0)
+                       goto fail;
        }
        return 0;
+fail:
+       if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+               if (parent_key)
+                       btrfs_disk_key_to_cpu(&cpukey, parent_key);
+               else
+                       btrfs_node_key_to_cpu(buf, &cpukey, 0);
+               btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+                                               buf->start, buf->len,
+                                               btrfs_header_level(buf));
+       }
+       return -EIO;
 }
 
-static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
-                     int level)
+int btrfs_check_leaf(struct btrfs_root *root,
+                     struct btrfs_disk_key *parent_key,
+                     struct extent_buffer *buf)
 {
-       struct extent_buffer *leaf = path->nodes[level];
-       struct extent_buffer *parent = NULL;
-       int parent_slot;
+       int i;
        struct btrfs_key cpukey;
-       struct btrfs_disk_key parent_key;
-       struct btrfs_disk_key leaf_key;
-       int slot = path->slots[0];
-
-       u32 nritems = btrfs_header_nritems(leaf);
+       struct btrfs_disk_key key;
+       u32 nritems = btrfs_header_nritems(buf);
 
-       if (path->nodes[level + 1])
-               parent = path->nodes[level + 1];
+       if (btrfs_header_level(buf) != 0) {
+               fprintf(stderr, "leaf is not a leaf %llu\n",
+                      (unsigned long long)btrfs_header_bytenr(buf));
+               goto fail;
+       }
+       if (btrfs_leaf_free_space(root, buf) < 0) {
+               fprintf(stderr, "leaf free space incorrect %llu %d\n",
+                       (unsigned long long)btrfs_header_bytenr(buf),
+                       btrfs_leaf_free_space(root, buf));
+               goto fail;
+       }
 
        if (nritems == 0)
                return 0;
 
-       if (parent) {
-               parent_slot = path->slots[level + 1];
-               btrfs_node_key(parent, &parent_key, parent_slot);
-               btrfs_item_key(leaf, &leaf_key, 0);
-
-               BUG_ON(memcmp(&parent_key, &leaf_key,
-                      sizeof(struct btrfs_disk_key)));
-               BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-                      btrfs_header_bytenr(leaf));
+       btrfs_item_key(buf, &key, 0);
+       if (parent_key && parent_key->type &&
+           memcmp(parent_key, &key, sizeof(key))) {
+               fprintf(stderr, "leaf parent key incorrect %llu\n",
+                      (unsigned long long)btrfs_header_bytenr(buf));
+               goto fail;
        }
-#if 0
        for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-               btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
-               btrfs_item_key(leaf, &leaf_key, i);
-               if (comp_keys(&leaf_key, &cpukey) >= 0) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad key\n", i);
-                       BUG_ON(1);
+               btrfs_item_key(buf, &key, i);
+               btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
+               if (btrfs_comp_keys(&key, &cpukey) >= 0) {
+                       fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
+                       goto fail;
                }
-               if (btrfs_item_offset_nr(leaf, i) !=
-                       btrfs_item_end_nr(leaf, i + 1)) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad\n", i);
-                       BUG_ON(1);
+               if (btrfs_item_offset_nr(buf, i) !=
+                       btrfs_item_end_nr(buf, i + 1)) {
+                       fprintf(stderr, "incorrect offsets %u %u\n",
+                               btrfs_item_offset_nr(buf, i),
+                               btrfs_item_end_nr(buf, i + 1));
+                       goto fail;
                }
-               if (i == 0) {
-                       if (btrfs_item_offset_nr(leaf, i) +
-                              btrfs_item_size_nr(leaf, i) !=
-                              BTRFS_LEAF_DATA_SIZE(root)) {
-                               btrfs_print_leaf(root, leaf);
-                               printk("slot %d first offset bad\n", i);
-                               BUG_ON(1);
-                       }
+               if (i == 0 && btrfs_item_end_nr(buf, i) !=
+                   BTRFS_LEAF_DATA_SIZE(root)) {
+                       fprintf(stderr, "bad item end %u wanted %u\n",
+                               btrfs_item_end_nr(buf, i),
+                               (unsigned)BTRFS_LEAF_DATA_SIZE(root));
+                       goto fail;
                }
        }
-       if (nritems > 0) {
-               if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
-                               btrfs_print_leaf(root, leaf);
-                               printk("slot %d bad size \n", nritems - 1);
-                               BUG_ON(1);
-               }
-       }
-#endif
-       if (slot != 0 && slot < nritems - 1) {
-               btrfs_item_key(leaf, &leaf_key, slot);
-               btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
-               if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad key\n", slot);
-                       BUG_ON(1);
-               }
-               if (btrfs_item_offset_nr(leaf, slot - 1) !=
-                      btrfs_item_end_nr(leaf, slot)) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad\n", slot);
-                       BUG_ON(1);
-               }
-       }
-       if (slot < nritems - 1) {
-               btrfs_item_key(leaf, &leaf_key, slot);
-               btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
-               BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0);
-               if (btrfs_item_offset_nr(leaf, slot) !=
-                       btrfs_item_end_nr(leaf, slot + 1)) {
-                       btrfs_print_leaf(root, leaf);
-                       printk("slot %d offset bad\n", slot);
-                       BUG_ON(1);
-               }
-       }
-       BUG_ON(btrfs_item_offset_nr(leaf, 0) +
-              btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
        return 0;
+fail:
+       if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+               if (parent_key)
+                       btrfs_disk_key_to_cpu(&cpukey, parent_key);
+               else
+                       btrfs_item_key_to_cpu(buf, &cpukey, 0);
+
+               btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+                                               buf->start, buf->len, 0);
+       }
+       return -EIO;
 }
 
 static int noinline check_block(struct btrfs_root *root,
                                struct btrfs_path *path, int level)
 {
-       return 0;
-#if 0
-       struct extent_buffer *buf = path->nodes[level];
+       struct btrfs_disk_key key;
+       struct btrfs_disk_key *key_ptr = NULL;
+       struct extent_buffer *parent;
 
-       if (memcmp_extent_buffer(buf, root->fs_info->fsid,
-                                (unsigned long)btrfs_header_fsid(buf),
-                                BTRFS_FSID_SIZE)) {
-               printk("warning bad block %Lu\n", buf->start);
-               return 1;
+       if (path->nodes[level + 1]) {
+               parent = path->nodes[level + 1];
+               btrfs_node_key(parent, &key, path->slots[level + 1]);
+               key_ptr = &key;
        }
-#endif
        if (level == 0)
-               return check_leaf(root, path, level);
-       return check_node(root, path, level);
+               return btrfs_check_leaf(root, key_ptr, path->nodes[0]);
+       return btrfs_check_node(root, key_ptr, path->nodes[level]);
 }
 
 /*
@@ -924,8 +892,8 @@ static int balance_level(struct btrfs_trans_handle *trans,
                        wait_on_tree_block_writeback(root, right);
                        free_extent_buffer(right);
                        right = NULL;
-                       wret = del_ptr(trans, root, path, level + 1, pslot +
-                                      1);
+                       wret = btrfs_del_ptr(trans, root, path,
+                                            level + 1, pslot + 1);
                        if (wret)
                                ret = wret;
                        wret = btrfs_free_extent(trans, root, bytenr,
@@ -972,7 +940,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
                wait_on_tree_block_writeback(root, mid);
                free_extent_buffer(mid);
                mid = NULL;
-               wret = del_ptr(trans, root, path, level + 1, pslot);
+               wret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
                if (wret)
                        ret = wret;
                wret = btrfs_free_extent(trans, root, bytenr, blocksize,
@@ -2699,7 +2667,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
  * continuing all the way the root if required.  The root is converted into
  * a leaf if all the nodes are emptied.
  */
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                   struct btrfs_path *path, int level, int slot)
 {
        struct extent_buffer *parent = path->nodes[level];
@@ -2751,7 +2719,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
        int ret;
 
        WARN_ON(btrfs_header_generation(leaf) != trans->transid);
-       ret = del_ptr(trans, root, path, 1, path->slots[1]);
+       ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
        if (ret)
                return ret;
 
diff --git a/ctree.h b/ctree.h
index c96c859..5309059 100644 (file)
--- a/ctree.h
+++ b/ctree.h
@@ -802,7 +802,8 @@ struct btrfs_fs_info {
                                u64 bytenr, u64 num_bytes, u64 parent,
                                u64 root_objectid, u64 owner, u64 offset,
                                int refs_to_drop);
-       struct cache_tree * fsck_extent_cache;
+       struct cache_tree *fsck_extent_cache;
+       struct cache_tree *corrupt_blocks;
 };
 
 /*
@@ -1857,6 +1858,14 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 bytenr, u64 num,
                             int alloc, int mark_free);
 /* ctree.c */
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                  struct btrfs_path *path, int level, int slot);
+int btrfs_check_node(struct btrfs_root *root,
+                     struct btrfs_disk_key *parent_key,
+                     struct extent_buffer *buf);
+int btrfs_check_leaf(struct btrfs_root *root,
+                     struct btrfs_disk_key *parent_key,
+                     struct extent_buffer *buf);
 int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root);
 void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
diff --git a/repair.c b/repair.c
new file mode 100644 (file)
index 0000000..e640465
--- /dev/null
+++ b/repair.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2012 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "extent-cache.h"
+#include "utils.h"
+#include "repair.h"
+
+int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info,
+                                   struct btrfs_key *first_key,
+                                   u64 start, u64 len, int level)
+
+{
+       int ret = 0;
+       struct btrfs_corrupt_block *corrupt;
+
+       if (!info->corrupt_blocks)
+               return 0;
+
+       corrupt = malloc(sizeof(*corrupt));
+       if (!corrupt)
+               return -ENOMEM;
+
+       memcpy(&corrupt->key, first_key, sizeof(*first_key));
+       corrupt->cache.start = start;
+       corrupt->cache.size = len;
+       corrupt->level = level;
+
+       ret = insert_existing_cache_extent(info->corrupt_blocks, &corrupt->cache);
+       if (ret)
+               free(corrupt);
+       BUG_ON(ret && ret != -EEXIST);
+       return ret;
+}
+
diff --git a/repair.h b/repair.h
new file mode 100644 (file)
index 0000000..3d0dcb9
--- /dev/null
+++ b/repair.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2012 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_REPAIR__
+#define __BTRFS_REPAIR__
+
+struct btrfs_corrupt_block {
+       struct cache_extent cache;
+       struct btrfs_key key;
+       int level;
+};
+
+int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info,
+                                   struct btrfs_key *first_key,
+                                   u64 start, u64 len, int level);
+
+#endif