Retry metadata reads in the face of checksum failures
authorChris Mason <chris.mason@oracle.com>
Wed, 9 Apr 2008 20:28:12 +0000 (16:28 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Wed, 9 Apr 2008 20:28:12 +0000 (16:28 -0400)
disk-io.c
disk-io.h
utils.c
volumes.c
volumes.h

index d49f2ce..8150d73 100644 (file)
--- a/disk-io.c
+++ b/disk-io.c
 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
 {
        if (buf->start != btrfs_header_bytenr(buf))
-               BUG();
+               return 1;
 
        if (memcmp_extent_buffer(buf, root->fs_info->fsid,
                                 (unsigned long)btrfs_header_fsid(buf),
                                 BTRFS_FSID_SIZE))
-               BUG();
+               return 1;
        return 0;
 }
 
@@ -57,8 +57,8 @@ void btrfs_csum_final(u32 crc, char *result)
        *(__le32 *)result = ~cpu_to_le32(crc);
 }
 
-static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
-                          int verify)
+int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+                   int verify)
 {
        char result[BTRFS_CRC32_SIZE];
        u32 len;
@@ -70,8 +70,9 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
 
        if (verify) {
                if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
-                       printk("checksum verify failed on %llu\n",
-                               (unsigned long long)buf->start);
+                       printk("checksum verify failed on %llu wanted %X "
+                              "found %X\n", (unsigned long long)buf->start,
+                              *((int *)result), *((int *)buf));
                        return 1;
                }
        } else {
@@ -112,7 +113,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
        dev_nr = 0;
        length = blocksize;
        ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
-                             bytenr, &length, &multi);
+                             bytenr, &length, &multi, 0);
        BUG_ON(ret);
        device = multi->stripes[0].dev;
        device->total_ios++;
@@ -131,6 +132,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
        u64 length;
        struct btrfs_multi_bio *multi = NULL;
        struct btrfs_device *device;
+       int mirror_num = 0;
+       int num_copies;
 
        eb = btrfs_find_create_tree_block(root, bytenr, blocksize);
        if (!eb)
@@ -141,21 +144,35 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 
        dev_nr = 0;
        length = blocksize;
-       ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
-                             eb->start, &length, &multi);
-       BUG_ON(ret);
-       device = multi->stripes[0].dev;
-       eb->fd = device->fd;
-       device->total_ios++;
-       eb->dev_bytenr = multi->stripes[0].physical;
-       ret = read_extent_from_disk(eb);
-       if (ret) {
-               free_extent_buffer(eb);
-               return NULL;
+       while (1) {
+               ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+                                     eb->start, &length, &multi, mirror_num);
+               BUG_ON(ret);
+               device = multi->stripes[0].dev;
+               eb->fd = device->fd;
+               device->total_ios++;
+               eb->dev_bytenr = multi->stripes[0].physical;
+               kfree(multi);
+               ret = read_extent_from_disk(eb);
+               if (ret == 0 && check_tree_block(root, eb) == 0 &&
+                   csum_tree_block(root, eb, 1) == 0) {
+                       btrfs_set_buffer_uptodate(eb);
+                       return eb;
+               }
+               num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+                                             eb->start, eb->len);
+               if (num_copies == 1) {
+printk("reading %Lu failed only one copy\n", eb->start);
+                       break;
+               }
+               mirror_num++;
+               if (mirror_num > num_copies) {
+printk("bailing at mirror %d of %d\n", mirror_num, num_copies);
+                       break;
+               }
        }
-       btrfs_set_buffer_uptodate(eb);
-       kfree(multi);
-       return eb;
+       free_extent_buffer(eb);
+       return NULL;
 }
 
 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -177,7 +194,7 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        dev_nr = 0;
        length = eb->len;
        ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE,
-                             eb->start, &length, &multi);
+                             eb->start, &length, &multi, 0);
        while(dev_nr < multi->num_stripes) {
                BUG_ON(ret);
                eb->fd = multi->stripes[dev_nr].dev->fd;
index f680721..1b5cbdf 100644 (file)
--- a/disk-io.h
+++ b/disk-io.h
@@ -52,4 +52,6 @@ void btrfs_csum_final(u32 crc, char *result);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root);
 int btrfs_open_device(struct btrfs_device *dev);
+int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+                   int verify);
 #endif
diff --git a/utils.c b/utils.c
index 55d3f5f..9d162da 100644 (file)
--- a/utils.c
+++ b/utils.c
@@ -106,6 +106,7 @@ int make_btrfs(int fd, char *device_name,
 
        /* create the tree of root objects */
        memset(buf->data, 0, leafsize);
+       buf->len = leafsize;
        btrfs_set_header_bytenr(buf, blocks[1]);
        btrfs_set_header_nritems(buf, 3);
        btrfs_set_header_generation(buf, 1);
@@ -165,6 +166,7 @@ int make_btrfs(int fd, char *device_name,
        nritems++;
 
 
+       csum_tree_block(NULL, buf, 0);
        ret = pwrite(fd, buf->data, leafsize, blocks[1]);
        BUG_ON(ret != leafsize);
 
@@ -229,6 +231,7 @@ int make_btrfs(int fd, char *device_name,
        btrfs_set_header_bytenr(buf, blocks[2]);
        btrfs_set_header_owner(buf, BTRFS_EXTENT_TREE_OBJECTID);
        btrfs_set_header_nritems(buf, nritems);
+       csum_tree_block(NULL, buf, 0);
        ret = pwrite(fd, buf->data, leafsize, blocks[2]);
        BUG_ON(ret != leafsize);
 
@@ -302,6 +305,7 @@ int make_btrfs(int fd, char *device_name,
        btrfs_set_header_bytenr(buf, blocks[3]);
        btrfs_set_header_owner(buf, BTRFS_CHUNK_TREE_OBJECTID);
        btrfs_set_header_nritems(buf, nritems);
+       csum_tree_block(NULL, buf, 0);
        ret = pwrite(fd, buf->data, leafsize, blocks[3]);
 
        /* create the device tree */
@@ -325,12 +329,14 @@ int make_btrfs(int fd, char *device_name,
        btrfs_set_header_bytenr(buf, blocks[4]);
        btrfs_set_header_owner(buf, BTRFS_DEV_TREE_OBJECTID);
        btrfs_set_header_nritems(buf, nritems);
+       csum_tree_block(NULL, buf, 0);
        ret = pwrite(fd, buf->data, leafsize, blocks[4]);
 
        /* finally create the FS root */
        btrfs_set_header_bytenr(buf, blocks[5]);
        btrfs_set_header_owner(buf, BTRFS_FS_TREE_OBJECTID);
        btrfs_set_header_nritems(buf, 0);
+       csum_tree_block(NULL, buf, 0);
        ret = pwrite(fd, buf->data, leafsize, blocks[5]);
        BUG_ON(ret != leafsize);
 
@@ -338,6 +344,8 @@ int make_btrfs(int fd, char *device_name,
        BUG_ON(sizeof(super) > sectorsize);
        memset(buf->data, 0, sectorsize);
        memcpy(buf->data, &super, sizeof(super));
+       buf->len = sectorsize;
+       csum_tree_block(NULL, buf, 0);
        ret = pwrite(fd, buf->data, sectorsize, blocks[0]);
        BUG_ON(ret != sectorsize);
 
index ef2f59c..44e6729 100644 (file)
--- a/volumes.c
+++ b/volumes.c
@@ -732,9 +732,29 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
        cache_tree_init(&tree->cache_tree);
 }
 
+int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
+{
+       struct cache_extent *ce;
+       struct map_lookup *map;
+       int ret;
+       u64 offset;
+
+       ce = find_first_cache_extent(&map_tree->cache_tree, logical);
+       BUG_ON(!ce);
+       BUG_ON(ce->start > logical || ce->start + ce->size < logical);
+       map = container_of(ce, struct map_lookup, ce);
+
+       offset = logical - ce->start;
+       if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
+               ret = map->num_stripes;
+       else
+               ret = 1;
+       return ret;
+}
+
 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
                    u64 logical, u64 *length,
-                   struct btrfs_multi_bio **multi_ret)
+                   struct btrfs_multi_bio **multi_ret, int mirror_num)
 {
        struct cache_extent *ce;
        struct map_lookup *map;
@@ -802,11 +822,15 @@ again:
        if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
                if (rw == WRITE)
                        multi->num_stripes = map->num_stripes;
+               else if (mirror_num)
+                       stripe_index = mirror_num - 1;
                else
                        stripe_index = stripe_nr % map->num_stripes;
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
                if (rw == WRITE)
                        multi->num_stripes = map->num_stripes;
+               else if (mirror_num)
+                       stripe_index = mirror_num - 1;
        } else {
                /*
                 * after this do_div call, stripe_nr is the number of stripes
index 9928c1f..be54316 100644 (file)
--- a/volumes.h
+++ b/volumes.h
@@ -85,7 +85,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
                           u64 owner, u64 num_bytes, u64 *start);
 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
                    u64 logical, u64 *length,
-                   struct btrfs_multi_bio **multi_ret);
+                   struct btrfs_multi_bio **multi_ret, int mirror_num);
 int btrfs_read_sys_array(struct btrfs_root *root);
 int btrfs_read_chunk_tree(struct btrfs_root *root);
 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -104,4 +104,5 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
 int btrfs_scan_one_device(int fd, const char *path,
                          struct btrfs_fs_devices **fs_devices_ret,
                          u64 *total_devs, u64 super_offset);
+int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
 #endif