* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
-#define _XOPEN_SOURCE 500
-#define _GNU_SOURCE
+
+#include "kerncompat.h"
+#include "androidcompat.h"
#include <stdio.h>
#include <stdio_ext.h>
#include <uuid/uuid.h>
#include <pthread.h>
-#include "kerncompat.h"
#include "list.h"
#include "radix-tree.h"
#include "ctree.h"
#include "transaction.h"
#include "crc32c.h"
#include "utils.h"
-#include "version.h"
#include "btrfsck.h"
#include "commands.h"
-#define BTRFS_NUM_MIRRORS 2
-
struct recover_control {
int verbose;
int yes;
struct list_head good_chunks;
struct list_head bad_chunks;
+ struct list_head rebuild_chunks;
struct list_head unrepaired_chunks;
pthread_mutex_t rc_lock;
};
struct cache_extent cache;
u64 generation;
u8 csum[BTRFS_CSUM_SIZE];
- struct btrfs_device *devices[BTRFS_NUM_MIRRORS];
- u64 offsets[BTRFS_NUM_MIRRORS];
+ struct btrfs_device *devices[BTRFS_MAX_MIRRORS];
+ u64 offsets[BTRFS_MAX_MIRRORS];
int nmirrors;
};
struct recover_control *rc;
struct btrfs_device *dev;
int fd;
+ u64 bytenr;
};
static struct extent_record *btrfs_new_extent_record(struct extent_buffer *eb)
memcmp(exist->csum, rec->csum, BTRFS_CSUM_SIZE)) {
ret = -EEXIST;
} else {
- BUG_ON(exist->nmirrors >= BTRFS_NUM_MIRRORS);
+ BUG_ON(exist->nmirrors >= BTRFS_MAX_MIRRORS);
exist->devices[exist->nmirrors] = device;
exist->offsets[exist->nmirrors] = offset;
exist->nmirrors++;
INIT_LIST_HEAD(&rc->good_chunks);
INIT_LIST_HEAD(&rc->bad_chunks);
+ INIT_LIST_HEAD(&rc->rebuild_chunks);
INIT_LIST_HEAD(&rc->unrepaired_chunks);
rc->verbose = verbose;
list_del_init(&exist->list);
free(exist);
/*
- * We must do seach again to avoid the following cache.
+ * We must do search again to avoid the following cache.
* /--old bg 1--//--old bg 2--/
* /--new bg--/
*/
return;
printf("CHECK RESULT:\n");
- printf("Healthy Chunks:\n");
+ printf("Recoverable Chunks:\n");
list_for_each_entry(chunk, &rc->good_chunks, list) {
print_chunk_info(chunk, " ");
good++;
total++;
}
- printf("Bad Chunks:\n");
+ list_for_each_entry(chunk, &rc->rebuild_chunks, list) {
+ print_chunk_info(chunk, " ");
+ good++;
+ total++;
+ }
+ list_for_each_entry(chunk, &rc->unrepaired_chunks, list) {
+ print_chunk_info(chunk, " ");
+ good++;
+ total++;
+ }
+ printf("Unrecoverable Chunks:\n");
list_for_each_entry(chunk, &rc->bad_chunks, list) {
print_chunk_info(chunk, " ");
bad++;
total++;
}
printf("\n");
- printf("Total Chunks:\t%d\n", total);
- printf(" Heathy:\t%d\n", good);
- printf(" Bad:\t%d\n", bad);
+ printf("Total Chunks:\t\t%d\n", total);
+ printf(" Recoverable:\t\t%d\n", good);
+ printf(" Unrecoverable:\t%d\n", bad);
printf("\n");
printf("Orphan Block Groups:\n");
printf("Orphan Device Extents:\n");
list_for_each_entry(devext, &rc->devext.no_chunk_orphans, chunk_list)
print_device_extent_info(devext, " ");
+ printf("\n");
}
static int check_chunk_by_metadata(struct recover_control *rc,
bytenr = 0;
while (1) {
+ dev_scan->bytenr = bytenr;
+
if (is_super_block_address(bytenr))
bytenr += rc->sectorsize;
struct btrfs_device *dev;
struct device_scan *dev_scans;
pthread_t *t_scans;
- int *t_rets;
+ long *t_rets;
int devnr = 0;
int devidx = 0;
- int cancel_from = 0;
- int cancel_to = 0;
int i;
+ int all_done;
list_for_each_entry(dev, &rc->fs_devices->devices, dev_list)
devnr++;
t_scans = (pthread_t *)malloc(sizeof(pthread_t) * devnr);
if (!t_scans)
return -ENOMEM;
- t_rets = (int *)malloc(sizeof(int) * devnr);
+ t_rets = (long *)malloc(sizeof(long) * devnr);
if (!t_rets)
return -ENOMEM;
dev_scans[devidx].rc = rc;
dev_scans[devidx].dev = dev;
dev_scans[devidx].fd = fd;
- ret = pthread_create(&t_scans[devidx], NULL,
- (void *)scan_one_device,
- (void *)&dev_scans[devidx]);
- if (ret) {
- cancel_from = 0;
- cancel_to = devidx - 1;
- goto out1;
- }
+ dev_scans[devidx].bytenr = -1;
devidx++;
}
- i = 0;
- while (i < devidx) {
- ret = pthread_join(t_scans[i], (void **)&t_rets[i]);
- if (ret || t_rets[i]) {
- ret = 1;
- cancel_from = i + 1;
- cancel_to = devnr - 1;
+ for (i = 0; i < devidx; i++) {
+ ret = pthread_create(&t_scans[i], NULL,
+ (void *)scan_one_device,
+ (void *)&dev_scans[i]);
+ if (ret)
goto out1;
+
+ dev_scans[i].bytenr = 0;
+ }
+
+ while (1) {
+ all_done = 1;
+ for (i = 0; i < devidx; i++) {
+ if (dev_scans[i].bytenr == -1)
+ continue;
+ ret = pthread_tryjoin_np(t_scans[i],
+ (void **)&t_rets[i]);
+ if (ret == EBUSY) {
+ all_done = 0;
+ continue;
+ }
+ if (ret || t_rets[i]) {
+ ret = 1;
+ goto out1;
+ }
+ dev_scans[i].bytenr = -1;
}
- i++;
+
+ printf("\rScanning: ");
+ for (i = 0; i < devidx; i++) {
+ if (dev_scans[i].bytenr == -1)
+ printf("%sDONE in dev%d",
+ i ? ", " : "", i);
+ else
+ printf("%s%llu in dev%d",
+ i ? ", " : "", dev_scans[i].bytenr, i);
+ }
+ /* clear chars if exist in tail */
+ printf(" ");
+ printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b");
+ fflush(stdout);
+
+ if (all_done) {
+ printf("\n");
+ break;
+ }
+
+ sleep(1);
}
out1:
- while (ret && (cancel_from <= cancel_to)) {
- pthread_cancel(t_scans[cancel_from]);
- cancel_from++;
+ for (i = 0; i < devidx; i++) {
+ if (dev_scans[i].bytenr == -1)
+ continue;
+ pthread_cancel(t_scans[i]);
}
out2:
free(dev_scans);
if (ret)
return ret;
}
+ list_for_each_entry(chunk, &rc->rebuild_chunks, list) {
+ ret = build_device_map_by_chunk_record(root, chunk);
+ if (ret)
+ return ret;
+ }
return ret;
}
return ret;
}
-static int block_group_free_all_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static int block_group_free_all_extent(struct btrfs_root *root,
struct block_group_record *bg)
{
struct btrfs_block_group_cache *cache;
if (ret)
return ret;
- ret = block_group_free_all_extent(trans, root, chunk->bg_rec);
+ ret = block_group_free_all_extent(root, chunk->bg_rec);
if (ret)
return ret;
}
return ret;
}
+static int __insert_chunk_item(struct btrfs_trans_handle *trans,
+ struct chunk_record *chunk_rec,
+ struct btrfs_root *chunk_root)
+{
+ struct btrfs_key key;
+ struct btrfs_chunk *chunk = NULL;
+ int ret = 0;
+
+ chunk = create_chunk_item(chunk_rec);
+ if (!chunk)
+ return -ENOMEM;
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = chunk_rec->offset;
+
+ ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
+ btrfs_chunk_item_size(chunk->num_stripes));
+ free(chunk);
+ return ret;
+}
+
static int __rebuild_chunk_items(struct btrfs_trans_handle *trans,
struct recover_control *rc,
struct btrfs_root *root)
{
- struct btrfs_key key;
- struct btrfs_chunk *chunk = NULL;
struct btrfs_root *chunk_root;
struct chunk_record *chunk_rec;
int ret;
chunk_root = root->fs_info->chunk_root;
list_for_each_entry(chunk_rec, &rc->good_chunks, list) {
- chunk = create_chunk_item(chunk_rec);
- if (!chunk)
- return -ENOMEM;
-
- key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
- key.type = BTRFS_CHUNK_ITEM_KEY;
- key.offset = chunk_rec->offset;
-
- ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
- btrfs_chunk_item_size(chunk->num_stripes));
- free(chunk);
+ ret = __insert_chunk_item(trans, chunk_rec, chunk_root);
+ if (ret)
+ return ret;
+ }
+ list_for_each_entry(chunk_rec, &rc->rebuild_chunks, list) {
+ ret = __insert_chunk_item(trans, chunk_rec, chunk_root);
if (ret)
return ret;
}
}
+static int calculate_bg_used(struct btrfs_root *extent_root,
+ struct chunk_record *chunk_rec,
+ struct btrfs_path *path,
+ u64 *used)
+{
+ struct extent_buffer *node;
+ struct btrfs_key found_key;
+ int slot;
+ int ret = 0;
+ u64 used_ret = 0;
+
+ while (1) {
+ node = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(node, &found_key, slot);
+ if (found_key.objectid >= chunk_rec->offset + chunk_rec->length)
+ break;
+ if (found_key.type != BTRFS_METADATA_ITEM_KEY &&
+ found_key.type != BTRFS_EXTENT_DATA_KEY)
+ goto next;
+ if (found_key.type == BTRFS_METADATA_ITEM_KEY)
+ used_ret += extent_root->nodesize;
+ else
+ used_ret += found_key.offset;
+next:
+ if (slot + 1 < btrfs_header_nritems(node)) {
+ slot++;
+ } else {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ if (ret < 0)
+ break;
+ }
+ }
+ if (!ret)
+ *used = used_ret;
+ return ret;
+}
+
+static int __insert_block_group(struct btrfs_trans_handle *trans,
+ struct chunk_record *chunk_rec,
+ struct btrfs_root *extent_root,
+ u64 used)
+{
+ struct btrfs_block_group_item bg_item;
+ struct btrfs_key key;
+ int ret = 0;
+
+ btrfs_set_block_group_used(&bg_item, used);
+ btrfs_set_block_group_chunk_objectid(&bg_item, used);
+ btrfs_set_block_group_flags(&bg_item, chunk_rec->type_flags);
+ key.objectid = chunk_rec->offset;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = chunk_rec->length;
+
+ ret = btrfs_insert_item(trans, extent_root, &key, &bg_item,
+ sizeof(bg_item));
+ return ret;
+}
+
+/*
+ * Search through the extent tree to rebuild the 'used' member of the block
+ * group.
+ * However, since block group and extent item shares the extent tree,
+ * the extent item may also missing.
+ * In that case, we fill the 'used' with the length of the block group to
+ * ensure no write into the block group.
+ * Btrfsck will hate it but we will inform user to call '--init-extent-tree'
+ * if possible, or just salvage as much data as possible from the fs.
+ */
+static int rebuild_block_group(struct btrfs_trans_handle *trans,
+ struct recover_control *rc,
+ struct btrfs_root *root)
+{
+ struct chunk_record *chunk_rec;
+ struct btrfs_key search_key;
+ struct btrfs_path *path;
+ u64 used = 0;
+ int ret = 0;
+
+ if (list_empty(&rc->rebuild_chunks))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ list_for_each_entry(chunk_rec, &rc->rebuild_chunks, list) {
+ search_key.objectid = chunk_rec->offset;
+ search_key.type = BTRFS_EXTENT_ITEM_KEY;
+ search_key.offset = 0;
+ ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
+ &search_key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ ret = calculate_bg_used(root->fs_info->extent_root,
+ chunk_rec, path, &used);
+ /*
+ * Extent tree is damaged, better to rebuild the whole extent
+ * tree. Currently, change the used to chunk's len to prevent
+ * write/block reserve happening in that block group.
+ */
+ if (ret < 0) {
+ fprintf(stderr,
+ "Fail to search extent tree for block group: [%llu,%llu]\n",
+ chunk_rec->offset,
+ chunk_rec->offset + chunk_rec->length);
+ fprintf(stderr,
+ "Mark the block group full to prevent block rsv problems\n");
+ used = chunk_rec->length;
+ }
+ btrfs_release_path(path);
+ ret = __insert_block_group(trans, chunk_rec,
+ root->fs_info->extent_root,
+ used);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static struct btrfs_root *
open_ctree_with_broken_chunk(struct recover_control *rc)
{
disk_super = fs_info->super_copy;
ret = btrfs_read_dev_super(fs_info->fs_devices->latest_bdev,
- disk_super, fs_info->super_bytenr);
+ disk_super, fs_info->super_bytenr, 1);
if (ret) {
fprintf(stderr, "No valid btrfs found\n");
goto out_devices;
return -1;
}
- sb = malloc(sizeof(struct btrfs_super_block));
+ sb = malloc(BTRFS_SUPER_INFO_SIZE);
if (!sb) {
fprintf(stderr, "allocating memory for sb failed.\n");
ret = -ENOMEM;
goto fail_close_fd;
}
- ret = btrfs_read_dev_super(fd, sb, BTRFS_SUPER_INFO_OFFSET);
+ ret = btrfs_read_dev_super(fd, sb, BTRFS_SUPER_INFO_OFFSET, 1);
if (ret) {
fprintf(stderr, "read super block error\n");
goto fail_free_sb;
goto fail_free_sb;
}
- ret = btrfs_scan_fs_devices(fd, path, &fs_devices, 0, 1);
+ ret = btrfs_scan_fs_devices(fd, path, &fs_devices, 0, 1, 0);
if (ret)
goto fail_free_sb;
stripe_nr /= nr_data_stripes;
index = (index + stripe_nr) % chunk->num_stripes;
} else {
- BUG_ON(1);
+ return -1;
}
return index;
}
again:
er = container_of(cache, struct extent_record, cache);
index = btrfs_calc_stripe_index(chunk, er->cache.start);
+ BUG_ON(index == -1);
if (chunk->stripes[index].devid)
goto next;
list_for_each_entry_safe(devext, next, &devexts, chunk_list) {
return num_of_records;
}
+static int fill_chunk_up(struct chunk_record *chunk, struct list_head *devexts,
+ struct recover_control *rc)
+{
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < chunk->num_stripes; i++) {
+ if (!chunk->stripes[i].devid) {
+ ret = insert_stripe(devexts, rc, chunk, i);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
#define EQUAL_STRIPE (1 << 0)
static int rebuild_raid_data_chunk_stripes(struct recover_control *rc,
fprintf(stderr, "Fetch csum failed\n");
goto fail_out;
} else if (ret == 1) {
- num_unordered = count_devext_records(&unordered);
if (!(*flags & EQUAL_STRIPE))
*flags |= EQUAL_STRIPE;
goto out;
num_unordered = count_devext_records(&unordered);
if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID6
&& num_unordered == 2) {
- list_splice_init(&unordered, &chunk->dextents);
btrfs_release_path(&path);
- return 0;
- } else
- ret = 1;
+ ret = fill_chunk_up(chunk, &unordered, rc);
+ return ret;
+ }
- goto fail_out;
+ goto next_stripe;
}
if (list_is_last(candidates.next, &candidates)) {
index = btrfs_calc_stripe_index(chunk,
key.offset + csum_offset * blocksize);
+ BUG_ON(index == -1);
if (chunk->stripes[index].devid)
goto next_stripe;
ret = insert_stripe(&candidates, rc, chunk, index);
& BTRFS_BLOCK_GROUP_RAID5)
|| (num_unordered == 3 && chunk->type_flags
& BTRFS_BLOCK_GROUP_RAID6)) {
- for (i = 0; i < chunk->num_stripes; i++) {
- if (!chunk->stripes[i].devid) {
- ret = insert_stripe(&unordered, rc,
- chunk, i);
- if (ret)
- break;
- }
- }
+ ret = fill_chunk_up(chunk, &unordered, rc);
}
}
fail_out:
ret = insert_cache_extent(&rc->chunk, &chunk->cache);
BUG_ON(ret);
+ list_del_init(&bg->list);
if (!nstripes) {
list_add_tail(&chunk->list, &rc->bad_chunks);
continue;
return 0;
}
+static inline int is_chunk_overlap(struct chunk_record *chunk1,
+ struct chunk_record *chunk2)
+{
+ if (chunk1->offset >= chunk2->offset + chunk2->length ||
+ chunk1->offset + chunk1->length <= chunk2->offset)
+ return 0;
+ return 1;
+}
+
+/* Move invalid(overlap with good chunks) rebuild chunks to bad chunk list */
+static void validate_rebuild_chunks(struct recover_control *rc)
+{
+ struct chunk_record *good;
+ struct chunk_record *rebuild;
+ struct chunk_record *tmp;
+
+ list_for_each_entry_safe(rebuild, tmp, &rc->rebuild_chunks, list) {
+ list_for_each_entry(good, &rc->good_chunks, list) {
+ if (is_chunk_overlap(rebuild, good)) {
+ list_move_tail(&rebuild->list,
+ &rc->bad_chunks);
+ break;
+ }
+ }
+ }
+}
+
/*
- * Return 0 when succesful, < 0 on error and > 0 if aborted by user
+ * Return 0 when successful, < 0 on error and > 0 if aborted by user
*/
int btrfs_recover_chunk_tree(char *path, int verbose, int yes)
{
print_scan_result(&rc);
ret = check_chunks(&rc.chunk, &rc.bg, &rc.devext, &rc.good_chunks,
- &rc.bad_chunks, 1);
- print_check_result(&rc);
+ &rc.bad_chunks, &rc.rebuild_chunks, 1);
if (ret) {
if (!list_empty(&rc.bg.block_groups) ||
!list_empty(&rc.devext.no_chunk_orphans)) {
if (ret)
goto fail_rc;
}
- /*
- * If the chunk is healthy, its block group item and device
- * extent item should be written on the disks. So, it is very
- * likely that the bad chunk is a old one that has been
- * droppped from the fs. Don't deal with them now, we will
- * check it after the fs is opened.
- */
} else {
- fprintf(stderr, "Check chunks successfully with no orphans\n");
+ print_check_result(&rc);
+ printf("Check chunks successfully with no orphans\n");
goto fail_rc;
}
+ validate_rebuild_chunks(&rc);
+ print_check_result(&rc);
root = open_ctree_with_broken_chunk(&rc);
if (IS_ERR(root)) {
ret = rebuild_sys_array(&rc, root);
BUG_ON(ret);
+ ret = rebuild_block_group(trans, &rc, root);
+ if (ret) {
+ printf("Fail to rebuild block groups.\n");
+ printf("Recommend to run 'btrfs check --init-extent-tree <dev>' after recovery\n");
+ }
+
btrfs_commit_transaction(trans, root);
fail_close_ctree:
close_ctree(root);