2 * Copyright (C) 2008 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 500
24 #include <sys/types.h>
30 #include "kerncompat.h"
34 #include "transaction.h"
39 #define HEADER_MAGIC 0xbd5c25e27295668bULL
40 #define MAX_PENDING_SIZE (256 * 1024)
41 #define BLOCK_SIZE 1024
42 #define BLOCK_MASK (BLOCK_SIZE - 1)
44 #define COMPRESS_NONE 0
45 #define COMPRESS_ZLIB 1
47 struct meta_cluster_item {
50 } __attribute__ ((__packed__));
52 struct meta_cluster_header {
57 } __attribute__ ((__packed__));
59 /* cluster header + index items + buffers */
61 struct meta_cluster_header header;
62 struct meta_cluster_item items[];
63 } __attribute__ ((__packed__));
65 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
66 sizeof(struct meta_cluster_item))
69 struct list_head list;
70 struct list_head ordered;
78 struct metadump_struct {
79 struct btrfs_root *root;
82 struct meta_cluster *cluster;
86 pthread_mutex_t mutex;
88 struct rb_root name_tree;
90 struct list_head list;
91 struct list_head ordered;
111 struct mdrestore_struct {
117 pthread_mutex_t mutex;
120 struct list_head list;
124 u8 uuid[BTRFS_UUID_SIZE];
125 u8 fsid[BTRFS_FSID_SIZE];
133 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
135 static void csum_block(u8 *buf, size_t len)
137 char result[BTRFS_CRC32_SIZE];
139 crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
140 btrfs_csum_final(crc, result);
141 memcpy(buf, result, BTRFS_CRC32_SIZE);
144 static int has_name(struct btrfs_key *key)
147 case BTRFS_DIR_ITEM_KEY:
148 case BTRFS_DIR_INDEX_KEY:
149 case BTRFS_INODE_REF_KEY:
150 case BTRFS_INODE_EXTREF_KEY:
159 static char *generate_garbage(u32 name_len)
161 char *buf = malloc(name_len);
167 for (i = 0; i < name_len; i++) {
168 char c = rand() % 94 + 33;
178 static void tree_insert(struct rb_root *root, struct name *ins)
180 struct rb_node ** p = &root->rb_node;
181 struct rb_node * parent = NULL;
188 entry = rb_entry(parent, struct name, n);
190 len = min(ins->len, entry->len);
191 dir = memcmp(ins->val, entry->val, len);
201 rb_link_node(&ins->n, parent, p);
202 rb_insert_color(&ins->n, root);
205 static struct name *name_search(struct rb_root *root, char *name, u32 name_len)
207 struct rb_node *n = root->rb_node;
208 struct name *entry = NULL;
213 entry = rb_entry(n, struct name, n);
215 len = min(entry->len, name_len);
217 dir = memcmp(name, entry->val, len);
229 static char *find_collision(struct metadump_struct *md, char *name,
233 unsigned long checksum;
237 val = name_search(&md->name_tree, name, name_len);
243 val = malloc(sizeof(struct name));
245 fprintf(stderr, "Couldn't sanitize name, enomem\n");
249 memset(val, 0, sizeof(*val));
253 val->sub = malloc(name_len);
255 fprintf(stderr, "Couldn't sanitize name, enomem\n");
260 checksum = crc32c(~1, val->val, name_len);
261 memset(val->sub, ' ', name_len);
264 if (crc32c(~1, val->sub, name_len) == checksum &&
265 memcmp(val->sub, val->val, val->len)) {
270 if (val->sub[i] == 127) {
275 } while (val->sub[i] == 127);
280 if (val->sub[i] == '/')
282 memset(val->sub, ' ', i);
287 if (val->sub[i] == '/')
293 fprintf(stderr, "Couldn't find a collision for '%.*s', "
294 "generating normal garbage, it won't match indexes\n",
296 for (i = 0; i < name_len; i++) {
297 char c = rand() % 94 + 33;
305 tree_insert(&md->name_tree, val);
309 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
312 struct btrfs_dir_item *dir_item;
315 unsigned long name_ptr;
320 int free_garbage = (md->sanitize_names == 1);
322 dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
323 total_len = btrfs_item_size_nr(eb, slot);
324 while (cur < total_len) {
325 this_len = sizeof(*dir_item) +
326 btrfs_dir_name_len(eb, dir_item) +
327 btrfs_dir_data_len(eb, dir_item);
328 name_ptr = (unsigned long)(dir_item + 1);
329 name_len = btrfs_dir_name_len(eb, dir_item);
331 if (md->sanitize_names > 1) {
332 buf = malloc(name_len);
334 fprintf(stderr, "Couldn't sanitize name, "
338 read_extent_buffer(eb, buf, name_ptr, name_len);
339 garbage = find_collision(md, buf, name_len);
341 garbage = generate_garbage(name_len);
344 fprintf(stderr, "Couldn't sanitize name, enomem\n");
347 write_extent_buffer(eb, garbage, name_ptr, name_len);
349 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
356 static void sanitize_inode_ref(struct metadump_struct *md,
357 struct extent_buffer *eb, int slot, int ext)
359 struct btrfs_inode_extref *extref;
360 struct btrfs_inode_ref *ref;
363 unsigned long name_ptr;
367 int free_garbage = (md->sanitize_names == 1);
369 item_size = btrfs_item_size_nr(eb, slot);
370 ptr = btrfs_item_ptr_offset(eb, slot);
371 while (cur_offset < item_size) {
373 extref = (struct btrfs_inode_extref *)(ptr +
375 name_ptr = (unsigned long)(&extref->name);
376 len = btrfs_inode_extref_name_len(eb, extref);
377 cur_offset += sizeof(*extref);
379 ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
380 len = btrfs_inode_ref_name_len(eb, ref);
381 name_ptr = (unsigned long)(ref + 1);
382 cur_offset += sizeof(*ref);
386 if (md->sanitize_names > 1) {
389 fprintf(stderr, "Couldn't sanitize name, "
393 read_extent_buffer(eb, buf, name_ptr, len);
394 garbage = find_collision(md, buf, len);
396 garbage = generate_garbage(len);
400 fprintf(stderr, "Couldn't sanitize name, enomem\n");
403 write_extent_buffer(eb, garbage, name_ptr, len);
409 static void sanitize_name(struct metadump_struct *md, u8 *dst,
410 struct extent_buffer *src, struct btrfs_key *key,
413 struct extent_buffer *eb;
415 eb = alloc_dummy_eb(src->start, src->len);
417 fprintf(stderr, "Couldn't sanitize name, no memory\n");
421 memcpy(eb->data, dst, eb->len);
424 case BTRFS_DIR_ITEM_KEY:
425 case BTRFS_DIR_INDEX_KEY:
426 sanitize_dir_item(md, eb, slot);
428 case BTRFS_INODE_REF_KEY:
429 sanitize_inode_ref(md, eb, slot, 0);
431 case BTRFS_INODE_EXTREF_KEY:
432 sanitize_inode_ref(md, eb, slot, 1);
438 memcpy(dst, eb->data, eb->len);
443 * zero inline extents and csum items
445 static void zero_items(struct metadump_struct *md, u8 *dst,
446 struct extent_buffer *src)
448 struct btrfs_file_extent_item *fi;
449 struct btrfs_item *item;
450 struct btrfs_key key;
451 u32 nritems = btrfs_header_nritems(src);
456 for (i = 0; i < nritems; i++) {
457 item = btrfs_item_nr(src, i);
458 btrfs_item_key_to_cpu(src, &key, i);
459 if (key.type == BTRFS_CSUM_ITEM_KEY) {
460 size = btrfs_item_size_nr(src, i);
461 memset(dst + btrfs_leaf_data(src) +
462 btrfs_item_offset_nr(src, i), 0, size);
466 if (md->sanitize_names && has_name(&key)) {
467 sanitize_name(md, dst, src, &key, i);
471 if (key.type != BTRFS_EXTENT_DATA_KEY)
474 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
475 extent_type = btrfs_file_extent_type(src, fi);
476 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
479 ptr = btrfs_file_extent_inline_start(fi);
480 size = btrfs_file_extent_inline_item_len(src, item);
481 memset(dst + ptr, 0, size);
486 * copy buffer and zero useless data in the buffer
488 static void copy_buffer(struct metadump_struct *md, u8 *dst,
489 struct extent_buffer *src)
495 memcpy(dst, src->data, src->len);
496 if (src->start == BTRFS_SUPER_INFO_OFFSET)
499 level = btrfs_header_level(src);
500 nritems = btrfs_header_nritems(src);
503 size = sizeof(struct btrfs_header);
504 memset(dst + size, 0, src->len - size);
505 } else if (level == 0) {
506 size = btrfs_leaf_data(src) +
507 btrfs_item_offset_nr(src, nritems - 1) -
508 btrfs_item_nr_offset(nritems);
509 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
510 zero_items(md, dst, src);
512 size = offsetof(struct btrfs_node, ptrs) +
513 sizeof(struct btrfs_key_ptr) * nritems;
514 memset(dst + size, 0, src->len - size);
516 csum_block(dst, src->len);
519 static void *dump_worker(void *data)
521 struct metadump_struct *md = (struct metadump_struct *)data;
522 struct async_work *async;
526 pthread_mutex_lock(&md->mutex);
527 while (list_empty(&md->list)) {
529 pthread_mutex_unlock(&md->mutex);
532 pthread_cond_wait(&md->cond, &md->mutex);
534 async = list_entry(md->list.next, struct async_work, list);
535 list_del_init(&async->list);
536 pthread_mutex_unlock(&md->mutex);
538 if (md->compress_level > 0) {
539 u8 *orig = async->buffer;
541 async->bufsize = compressBound(async->size);
542 async->buffer = malloc(async->bufsize);
544 ret = compress2(async->buffer,
545 (unsigned long *)&async->bufsize,
546 orig, async->size, md->compress_level);
554 pthread_mutex_lock(&md->mutex);
556 pthread_mutex_unlock(&md->mutex);
562 static void meta_cluster_init(struct metadump_struct *md, u64 start)
564 struct meta_cluster_header *header;
568 header = &md->cluster->header;
569 header->magic = cpu_to_le64(HEADER_MAGIC);
570 header->bytenr = cpu_to_le64(start);
571 header->nritems = cpu_to_le32(0);
572 header->compress = md->compress_level > 0 ?
573 COMPRESS_ZLIB : COMPRESS_NONE;
576 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
577 FILE *out, int num_threads, int compress_level,
582 memset(md, 0, sizeof(*md));
583 pthread_cond_init(&md->cond, NULL);
584 pthread_mutex_init(&md->mutex, NULL);
585 INIT_LIST_HEAD(&md->list);
586 INIT_LIST_HEAD(&md->ordered);
589 md->pending_start = (u64)-1;
590 md->compress_level = compress_level;
591 md->cluster = calloc(1, BLOCK_SIZE);
592 md->sanitize_names = sanitize_names;
593 if (sanitize_names > 1)
594 crc32c_optimization_init();
597 pthread_cond_destroy(&md->cond);
598 pthread_mutex_destroy(&md->mutex);
602 meta_cluster_init(md, 0);
606 md->name_tree.rb_node = NULL;
607 md->num_threads = num_threads;
608 md->threads = calloc(num_threads, sizeof(pthread_t));
611 pthread_cond_destroy(&md->cond);
612 pthread_mutex_destroy(&md->mutex);
616 for (i = 0; i < num_threads; i++) {
617 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
623 pthread_mutex_lock(&md->mutex);
625 pthread_cond_broadcast(&md->cond);
626 pthread_mutex_unlock(&md->mutex);
628 for (i--; i >= 0; i--)
629 pthread_join(md->threads[i], NULL);
631 pthread_cond_destroy(&md->cond);
632 pthread_mutex_destroy(&md->mutex);
640 static void metadump_destroy(struct metadump_struct *md)
645 pthread_mutex_lock(&md->mutex);
647 pthread_cond_broadcast(&md->cond);
648 pthread_mutex_unlock(&md->mutex);
650 for (i = 0; i < md->num_threads; i++)
651 pthread_join(md->threads[i], NULL);
653 pthread_cond_destroy(&md->cond);
654 pthread_mutex_destroy(&md->mutex);
656 while ((n = rb_first(&md->name_tree))) {
659 name = rb_entry(n, struct name, n);
660 rb_erase(n, &md->name_tree);
669 static int write_zero(FILE *out, size_t size)
671 static char zero[BLOCK_SIZE];
672 return fwrite(zero, size, 1, out);
675 static int write_buffers(struct metadump_struct *md, u64 *next)
677 struct meta_cluster_header *header = &md->cluster->header;
678 struct meta_cluster_item *item;
679 struct async_work *async;
685 if (list_empty(&md->ordered))
688 /* wait until all buffers are compressed */
689 while (md->num_items > md->num_ready) {
690 struct timespec ts = {
694 pthread_mutex_unlock(&md->mutex);
695 nanosleep(&ts, NULL);
696 pthread_mutex_lock(&md->mutex);
699 /* setup and write index block */
700 list_for_each_entry(async, &md->ordered, ordered) {
701 item = md->cluster->items + nritems;
702 item->bytenr = cpu_to_le64(async->start);
703 item->size = cpu_to_le32(async->bufsize);
706 header->nritems = cpu_to_le32(nritems);
708 ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
710 fprintf(stderr, "Error writing out cluster: %d\n", errno);
715 bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
716 while (!list_empty(&md->ordered)) {
717 async = list_entry(md->ordered.next, struct async_work,
719 list_del_init(&async->ordered);
721 bytenr += async->bufsize;
723 ret = fwrite(async->buffer, async->bufsize, 1,
728 fprintf(stderr, "Error writing out cluster: %d\n",
736 /* zero unused space in the last block */
737 if (!err && bytenr & BLOCK_MASK) {
738 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
741 ret = write_zero(md->out, size);
743 fprintf(stderr, "Error zeroing out buffer: %d\n",
753 static int read_data_extent(struct metadump_struct *md,
754 struct async_work *async)
756 struct btrfs_multi_bio *multi = NULL;
757 struct btrfs_device *device;
758 u64 bytes_left = async->size;
759 u64 logical = async->start;
768 read_len = bytes_left;
769 ret = btrfs_map_block(&md->root->fs_info->mapping_tree, READ,
770 logical, &read_len, &multi, 0, NULL);
772 fprintf(stderr, "Couldn't map data block %d\n", ret);
776 device = multi->stripes[0].dev;
778 if (device->fd == 0) {
780 "Device we need to read from is not open\n");
785 bytenr = multi->stripes[0].physical;
788 read_len = min(read_len, bytes_left);
789 done = pread64(fd, async->buffer+offset, read_len, bytenr);
790 if (done < read_len) {
792 fprintf(stderr, "Error reading extent %d\n",
795 fprintf(stderr, "Short read\n");
807 static int flush_pending(struct metadump_struct *md, int done)
809 struct async_work *async = NULL;
810 struct extent_buffer *eb;
811 u64 blocksize = md->root->nodesize;
817 if (md->pending_size) {
818 async = calloc(1, sizeof(*async));
822 async->start = md->pending_start;
823 async->size = md->pending_size;
824 async->bufsize = async->size;
825 async->buffer = malloc(async->bufsize);
826 if (!async->buffer) {
831 start = async->start;
835 ret = read_data_extent(md, async);
843 while (!md->data && size > 0) {
844 u64 this_read = min(blocksize, size);
845 eb = read_tree_block(md->root, start, this_read, 0);
850 "Error reading metadata block\n");
853 copy_buffer(md, async->buffer + offset, eb);
854 free_extent_buffer(eb);
860 md->pending_start = (u64)-1;
861 md->pending_size = 0;
866 pthread_mutex_lock(&md->mutex);
868 list_add_tail(&async->ordered, &md->ordered);
870 if (md->compress_level > 0) {
871 list_add_tail(&async->list, &md->list);
872 pthread_cond_signal(&md->cond);
877 if (md->num_items >= ITEMS_PER_CLUSTER || done) {
878 ret = write_buffers(md, &start);
880 fprintf(stderr, "Error writing buffers %d\n",
883 meta_cluster_init(md, start);
885 pthread_mutex_unlock(&md->mutex);
889 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
893 if (md->data != data ||
894 md->pending_size + size > MAX_PENDING_SIZE ||
895 md->pending_start + md->pending_size != start) {
896 ret = flush_pending(md, 0);
899 md->pending_start = start;
901 readahead_tree_block(md->root, start, size, 0);
902 md->pending_size += size;
907 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
908 static int is_tree_block(struct btrfs_root *extent_root,
909 struct btrfs_path *path, u64 bytenr)
911 struct extent_buffer *leaf;
912 struct btrfs_key key;
916 leaf = path->nodes[0];
918 struct btrfs_extent_ref_v0 *ref_item;
920 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
921 ret = btrfs_next_leaf(extent_root, path);
926 leaf = path->nodes[0];
928 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
929 if (key.objectid != bytenr)
931 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
933 ref_item = btrfs_item_ptr(leaf, path->slots[0],
934 struct btrfs_extent_ref_v0);
935 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
936 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
944 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
945 struct metadump_struct *metadump, int root_tree)
947 struct extent_buffer *tmp;
948 struct btrfs_root_item *ri;
949 struct btrfs_key key;
956 ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0);
958 fprintf(stderr, "Error adding metadata block\n");
962 if (btrfs_header_level(eb) == 0 && !root_tree)
965 level = btrfs_header_level(eb);
966 nritems = btrfs_header_nritems(eb);
967 for (i = 0; i < nritems; i++) {
969 btrfs_item_key_to_cpu(eb, &key, i);
970 if (key.type != BTRFS_ROOT_ITEM_KEY)
972 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
973 bytenr = btrfs_disk_root_bytenr(eb, ri);
974 tmp = read_tree_block(root, bytenr, root->leafsize, 0);
977 "Error reading log root block\n");
980 ret = copy_tree_blocks(root, tmp, metadump, 0);
981 free_extent_buffer(tmp);
985 bytenr = btrfs_node_blockptr(eb, i);
986 tmp = read_tree_block(root, bytenr, root->leafsize, 0);
988 fprintf(stderr, "Error reading log block\n");
991 ret = copy_tree_blocks(root, tmp, metadump, root_tree);
992 free_extent_buffer(tmp);
1001 static int copy_log_trees(struct btrfs_root *root,
1002 struct metadump_struct *metadump,
1003 struct btrfs_path *path)
1005 u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1010 if (!root->fs_info->log_root_tree ||
1011 !root->fs_info->log_root_tree->node) {
1012 fprintf(stderr, "Error copying tree log, it wasn't setup\n");
1016 return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1020 static int copy_space_cache(struct btrfs_root *root,
1021 struct metadump_struct *metadump,
1022 struct btrfs_path *path)
1024 struct extent_buffer *leaf;
1025 struct btrfs_file_extent_item *fi;
1026 struct btrfs_key key;
1027 u64 bytenr, num_bytes;
1030 root = root->fs_info->tree_root;
1033 key.type = BTRFS_EXTENT_DATA_KEY;
1036 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1038 fprintf(stderr, "Error searching for free space inode %d\n",
1044 leaf = path->nodes[0];
1045 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1046 ret = btrfs_next_leaf(root, path);
1048 fprintf(stderr, "Error going to next leaf "
1054 leaf = path->nodes[0];
1057 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1058 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1063 fi = btrfs_item_ptr(leaf, path->slots[0],
1064 struct btrfs_file_extent_item);
1065 if (btrfs_file_extent_type(leaf, fi) !=
1066 BTRFS_FILE_EXTENT_REG) {
1071 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1072 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1073 ret = add_extent(bytenr, num_bytes, metadump, 1);
1075 fprintf(stderr, "Error adding space cache blocks %d\n",
1077 btrfs_release_path(root, path);
1086 static int copy_from_extent_tree(struct metadump_struct *metadump,
1087 struct btrfs_path *path)
1089 struct btrfs_root *extent_root;
1090 struct extent_buffer *leaf;
1091 struct btrfs_extent_item *ei;
1092 struct btrfs_key key;
1097 extent_root = metadump->root->fs_info->extent_root;
1098 bytenr = BTRFS_SUPER_INFO_OFFSET + 4096;
1099 key.objectid = bytenr;
1100 key.type = BTRFS_EXTENT_ITEM_KEY;
1103 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1105 fprintf(stderr, "Error searching extent root %d\n", ret);
1111 leaf = path->nodes[0];
1112 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1113 ret = btrfs_next_leaf(extent_root, path);
1115 fprintf(stderr, "Error going to next leaf %d"
1123 leaf = path->nodes[0];
1126 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1127 if (key.objectid < bytenr ||
1128 (key.type != BTRFS_EXTENT_ITEM_KEY &&
1129 key.type != BTRFS_METADATA_ITEM_KEY)) {
1134 bytenr = key.objectid;
1135 if (key.type == BTRFS_METADATA_ITEM_KEY)
1136 num_bytes = key.offset;
1138 num_bytes = extent_root->leafsize;
1140 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1141 ei = btrfs_item_ptr(leaf, path->slots[0],
1142 struct btrfs_extent_item);
1143 if (btrfs_extent_flags(leaf, ei) &
1144 BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1145 ret = add_extent(bytenr, num_bytes, metadump,
1148 fprintf(stderr, "Error adding block "
1154 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1155 ret = is_tree_block(extent_root, path, bytenr);
1157 fprintf(stderr, "Error checking tree block "
1163 ret = add_extent(bytenr, num_bytes, metadump,
1166 fprintf(stderr, "Error adding block "
1173 fprintf(stderr, "Either extent tree corruption or "
1174 "you haven't built with V0 support\n");
1179 bytenr += num_bytes;
1182 btrfs_release_path(extent_root, path);
1187 static int create_metadump(const char *input, FILE *out, int num_threads,
1188 int compress_level, int sanitize, int walk_trees)
1190 struct btrfs_root *root;
1191 struct btrfs_path *path = NULL;
1192 struct metadump_struct metadump;
1196 root = open_ctree(input, 0, 0);
1198 fprintf(stderr, "Open ctree failed\n");
1202 BUG_ON(root->nodesize != root->leafsize);
1204 ret = metadump_init(&metadump, root, out, num_threads,
1205 compress_level, sanitize);
1207 fprintf(stderr, "Error initing metadump %d\n", ret);
1212 ret = add_extent(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump, 0);
1214 fprintf(stderr, "Error adding metadata %d\n", ret);
1219 path = btrfs_alloc_path();
1221 fprintf(stderr, "Out of memory allocing path\n");
1227 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1234 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1241 ret = copy_from_extent_tree(&metadump, path);
1248 ret = copy_log_trees(root, &metadump, path);
1254 ret = copy_space_cache(root, &metadump, path);
1256 ret = flush_pending(&metadump, 1);
1260 fprintf(stderr, "Error flushing pending %d\n", ret);
1263 metadump_destroy(&metadump);
1265 btrfs_free_path(path);
1266 ret = close_ctree(root);
1267 return err ? err : ret;
1270 static void update_super_old(u8 *buffer)
1272 struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1273 struct btrfs_chunk *chunk;
1274 struct btrfs_disk_key *key;
1275 u32 sectorsize = btrfs_super_sectorsize(super);
1276 u64 flags = btrfs_super_flags(super);
1278 flags |= BTRFS_SUPER_FLAG_METADUMP;
1279 btrfs_set_super_flags(super, flags);
1281 key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1282 chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1283 sizeof(struct btrfs_disk_key));
1285 btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1286 btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1287 btrfs_set_disk_key_offset(key, 0);
1289 btrfs_set_stack_chunk_length(chunk, (u64)-1);
1290 btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1291 btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
1292 btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1293 btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1294 btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1295 btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1296 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1297 btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1298 chunk->stripe.devid = super->dev_item.devid;
1299 chunk->stripe.offset = cpu_to_le64(0);
1300 memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1301 btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1302 csum_block(buffer, 4096);
1305 static int update_super(u8 *buffer)
1307 struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1308 struct btrfs_chunk *chunk;
1309 struct btrfs_disk_key *disk_key;
1310 struct btrfs_key key;
1311 u32 new_array_size = 0;
1315 u8 *ptr, *write_ptr;
1316 int old_num_stripes;
1318 write_ptr = ptr = super->sys_chunk_array;
1319 array_size = btrfs_super_sys_array_size(super);
1321 while (cur < array_size) {
1322 disk_key = (struct btrfs_disk_key *)ptr;
1323 btrfs_disk_key_to_cpu(&key, disk_key);
1325 new_array_size += sizeof(*disk_key);
1326 memmove(write_ptr, ptr, sizeof(*disk_key));
1328 write_ptr += sizeof(*disk_key);
1329 ptr += sizeof(*disk_key);
1330 cur += sizeof(*disk_key);
1331 new_cur += sizeof(*disk_key);
1333 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1334 chunk = (struct btrfs_chunk *)ptr;
1335 old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1336 chunk = (struct btrfs_chunk *)write_ptr;
1338 memmove(write_ptr, ptr, sizeof(*chunk));
1339 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1340 btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1341 btrfs_set_stack_chunk_type(chunk,
1342 BTRFS_BLOCK_GROUP_SYSTEM);
1343 chunk->stripe.devid = super->dev_item.devid;
1344 chunk->stripe.offset = cpu_to_le64(key.offset);
1345 memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1347 new_array_size += sizeof(*chunk);
1348 new_cur += sizeof(*chunk);
1350 fprintf(stderr, "Bogus key in the sys chunk array "
1354 write_ptr += sizeof(*chunk);
1355 ptr += btrfs_chunk_item_size(old_num_stripes);
1356 cur += btrfs_chunk_item_size(old_num_stripes);
1359 btrfs_set_super_sys_array_size(super, new_array_size);
1360 csum_block(buffer, 4096);
1365 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1367 struct extent_buffer *eb;
1369 eb = malloc(sizeof(struct extent_buffer) + size);
1372 memset(eb, 0, sizeof(struct extent_buffer) + size);
1379 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1381 struct btrfs_item *item;
1389 old_size = btrfs_item_size_nr(eb, slot);
1390 if (old_size == new_size)
1393 nritems = btrfs_header_nritems(eb);
1394 data_end = btrfs_item_offset_nr(eb, nritems - 1);
1396 old_data_start = btrfs_item_offset_nr(eb, slot);
1397 size_diff = old_size - new_size;
1399 for (i = slot; i < nritems; i++) {
1401 item = btrfs_item_nr(eb, i);
1402 ioff = btrfs_item_offset(eb, item);
1403 btrfs_set_item_offset(eb, item, ioff + size_diff);
1406 memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1407 btrfs_leaf_data(eb) + data_end,
1408 old_data_start + new_size - data_end);
1409 item = btrfs_item_nr(eb, slot);
1410 btrfs_set_item_size(eb, item, new_size);
1413 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1414 struct async_work *async, u8 *buffer,
1417 struct extent_buffer *eb;
1418 size_t size_left = size;
1419 u64 bytenr = async->start;
1422 if (size_left % mdres->leafsize)
1425 eb = alloc_dummy_eb(bytenr, mdres->leafsize);
1431 memcpy(eb->data, buffer, mdres->leafsize);
1433 if (btrfs_header_bytenr(eb) != bytenr)
1435 if (memcmp(mdres->fsid,
1436 eb->data + offsetof(struct btrfs_header, fsid),
1440 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1443 if (btrfs_header_level(eb) != 0)
1446 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1447 struct btrfs_chunk chunk;
1448 struct btrfs_key key;
1451 btrfs_item_key_to_cpu(eb, &key, i);
1452 if (key.type != BTRFS_CHUNK_ITEM_KEY)
1454 truncate_item(eb, i, sizeof(chunk));
1455 read_extent_buffer(eb, &chunk,
1456 btrfs_item_ptr_offset(eb, i),
1459 /* Zero out the RAID profile */
1460 type = btrfs_stack_chunk_type(&chunk);
1461 type &= (BTRFS_BLOCK_GROUP_DATA |
1462 BTRFS_BLOCK_GROUP_SYSTEM |
1463 BTRFS_BLOCK_GROUP_METADATA);
1464 btrfs_set_stack_chunk_type(&chunk, type);
1466 btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1467 btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1468 btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid);
1469 btrfs_set_stack_stripe_offset(&chunk.stripe, key.offset);
1470 memcpy(chunk.stripe.dev_uuid, mdres->uuid,
1472 write_extent_buffer(eb, &chunk,
1473 btrfs_item_ptr_offset(eb, i),
1476 memcpy(buffer, eb->data, eb->len);
1477 csum_block(buffer, eb->len);
1479 size_left -= mdres->leafsize;
1480 buffer += mdres->leafsize;
1481 bytenr += mdres->leafsize;
1487 static void write_backup_supers(int fd, u8 *buf)
1495 if (fstat(fd, &st)) {
1496 fprintf(stderr, "Couldn't stat restore point, won't be able "
1497 "to write backup supers: %d\n", errno);
1501 size = btrfs_device_size(fd, &st);
1503 for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1504 bytenr = btrfs_sb_offset(i);
1505 if (bytenr + 4096 > size)
1507 ret = pwrite64(fd, buf, 4096, bytenr);
1510 fprintf(stderr, "Problem writing out backup "
1511 "super block %d, err %d\n", i, errno);
1513 fprintf(stderr, "Short write writing out "
1514 "backup super block\n");
1520 static void *restore_worker(void *data)
1522 struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1523 struct async_work *async;
1530 outfd = fileno(mdres->out);
1531 buffer = malloc(MAX_PENDING_SIZE * 2);
1533 fprintf(stderr, "Error allocing buffer\n");
1534 pthread_mutex_lock(&mdres->mutex);
1536 mdres->error = -ENOMEM;
1537 pthread_mutex_unlock(&mdres->mutex);
1544 pthread_mutex_lock(&mdres->mutex);
1545 while (!mdres->leafsize || list_empty(&mdres->list)) {
1547 pthread_mutex_unlock(&mdres->mutex);
1550 pthread_cond_wait(&mdres->cond, &mdres->mutex);
1552 async = list_entry(mdres->list.next, struct async_work, list);
1553 list_del_init(&async->list);
1554 pthread_mutex_unlock(&mdres->mutex);
1556 if (mdres->compress_method == COMPRESS_ZLIB) {
1557 size = MAX_PENDING_SIZE * 2;
1558 ret = uncompress(buffer, (unsigned long *)&size,
1559 async->buffer, async->bufsize);
1561 fprintf(stderr, "Error decompressing %d\n",
1567 outbuf = async->buffer;
1568 size = async->bufsize;
1571 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1572 if (mdres->old_restore) {
1573 update_super_old(outbuf);
1575 ret = update_super(outbuf);
1579 } else if (!mdres->old_restore) {
1580 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1585 ret = pwrite64(outfd, outbuf, size, async->start);
1588 fprintf(stderr, "Error writing to device %d\n",
1592 fprintf(stderr, "Short write\n");
1597 if (async->start == BTRFS_SUPER_INFO_OFFSET)
1598 write_backup_supers(outfd, outbuf);
1600 pthread_mutex_lock(&mdres->mutex);
1601 if (err && !mdres->error)
1604 pthread_mutex_unlock(&mdres->mutex);
1606 free(async->buffer);
1614 static void mdrestore_destroy(struct mdrestore_struct *mdres)
1617 pthread_mutex_lock(&mdres->mutex);
1619 pthread_cond_broadcast(&mdres->cond);
1620 pthread_mutex_unlock(&mdres->mutex);
1622 for (i = 0; i < mdres->num_threads; i++)
1623 pthread_join(mdres->threads[i], NULL);
1625 pthread_cond_destroy(&mdres->cond);
1626 pthread_mutex_destroy(&mdres->mutex);
1627 free(mdres->threads);
1630 static int mdrestore_init(struct mdrestore_struct *mdres,
1631 FILE *in, FILE *out, int old_restore,
1636 memset(mdres, 0, sizeof(*mdres));
1637 pthread_cond_init(&mdres->cond, NULL);
1638 pthread_mutex_init(&mdres->mutex, NULL);
1639 INIT_LIST_HEAD(&mdres->list);
1642 mdres->old_restore = old_restore;
1647 mdres->num_threads = num_threads;
1648 mdres->threads = calloc(num_threads, sizeof(pthread_t));
1649 if (!mdres->threads)
1651 for (i = 0; i < num_threads; i++) {
1652 ret = pthread_create(mdres->threads + i, NULL, restore_worker,
1658 mdrestore_destroy(mdres);
1662 static int fill_mdres_info(struct mdrestore_struct *mdres,
1663 struct async_work *async)
1665 struct btrfs_super_block *super;
1670 if (mdres->compress_method == COMPRESS_ZLIB) {
1671 size_t size = MAX_PENDING_SIZE * 2;
1673 buffer = malloc(MAX_PENDING_SIZE * 2);
1676 ret = uncompress(buffer, (unsigned long *)&size,
1677 async->buffer, async->bufsize);
1679 fprintf(stderr, "Error decompressing %d\n", ret);
1685 outbuf = async->buffer;
1688 super = (struct btrfs_super_block *)outbuf;
1689 mdres->leafsize = btrfs_super_leafsize(super);
1690 memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1691 memcpy(mdres->uuid, super->dev_item.uuid,
1693 mdres->devid = le64_to_cpu(super->dev_item.devid);
1698 static int add_cluster(struct meta_cluster *cluster,
1699 struct mdrestore_struct *mdres, u64 *next)
1701 struct meta_cluster_item *item;
1702 struct meta_cluster_header *header = &cluster->header;
1703 struct async_work *async;
1708 BUG_ON(mdres->num_items);
1709 mdres->compress_method = header->compress;
1711 bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1712 nritems = le32_to_cpu(header->nritems);
1713 for (i = 0; i < nritems; i++) {
1714 item = &cluster->items[i];
1715 async = calloc(1, sizeof(*async));
1717 fprintf(stderr, "Error allocating async\n");
1720 async->start = le64_to_cpu(item->bytenr);
1721 async->bufsize = le32_to_cpu(item->size);
1722 async->buffer = malloc(async->bufsize);
1723 if (!async->buffer) {
1724 fprintf(stderr, "Error allocing async buffer\n");
1728 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1730 fprintf(stderr, "Error reading buffer %d\n", errno);
1731 free(async->buffer);
1735 bytenr += async->bufsize;
1737 pthread_mutex_lock(&mdres->mutex);
1738 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1739 ret = fill_mdres_info(mdres, async);
1741 fprintf(stderr, "Error setting up restore\n");
1742 pthread_mutex_unlock(&mdres->mutex);
1743 free(async->buffer);
1748 list_add_tail(&async->list, &mdres->list);
1750 pthread_cond_signal(&mdres->cond);
1751 pthread_mutex_unlock(&mdres->mutex);
1753 if (bytenr & BLOCK_MASK) {
1754 char buffer[BLOCK_MASK];
1755 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1758 ret = fread(buffer, size, 1, mdres->in);
1760 fprintf(stderr, "Error reading in buffer %d\n", errno);
1768 static int wait_for_worker(struct mdrestore_struct *mdres)
1772 pthread_mutex_lock(&mdres->mutex);
1774 while (!ret && mdres->num_items > 0) {
1775 struct timespec ts = {
1777 .tv_nsec = 10000000,
1779 pthread_mutex_unlock(&mdres->mutex);
1780 nanosleep(&ts, NULL);
1781 pthread_mutex_lock(&mdres->mutex);
1784 pthread_mutex_unlock(&mdres->mutex);
1788 static int restore_metadump(const char *input, FILE *out, int old_restore,
1791 struct meta_cluster *cluster = NULL;
1792 struct meta_cluster_header *header;
1793 struct mdrestore_struct mdrestore;
1798 if (!strcmp(input, "-")) {
1801 in = fopen(input, "r");
1803 perror("unable to open metadump image");
1808 cluster = malloc(BLOCK_SIZE);
1810 fprintf(stderr, "Error allocating cluster\n");
1816 ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads);
1818 fprintf(stderr, "Error initing mdrestore %d\n", ret);
1826 ret = fread(cluster, BLOCK_SIZE, 1, in);
1830 header = &cluster->header;
1831 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
1832 le64_to_cpu(header->bytenr) != bytenr) {
1833 fprintf(stderr, "bad header in metadump image\n");
1837 ret = add_cluster(cluster, &mdrestore, &bytenr);
1839 fprintf(stderr, "Error adding cluster\n");
1843 ret = wait_for_worker(&mdrestore);
1845 fprintf(stderr, "One of the threads errored out %d\n",
1851 mdrestore_destroy(&mdrestore);
1858 static void print_usage(void)
1860 fprintf(stderr, "usage: btrfs-image [options] source target\n");
1861 fprintf(stderr, "\t-r \trestore metadump image\n");
1862 fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
1863 fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
1864 fprintf(stderr, "\t-o \tdon't mess with the chunk tree when restoring\n");
1865 fprintf(stderr, "\t-s \tsanitize file names, use once to just use garbage, use twice if you want crc collisions");
1866 fprintf(stderr, "\t-w \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
1870 int main(int argc, char *argv[])
1874 int num_threads = 0;
1875 int compress_level = 0;
1877 int old_restore = 0;
1884 int c = getopt(argc, argv, "rc:t:osw");
1892 num_threads = atoi(optarg);
1893 if (num_threads <= 0 || num_threads > 32)
1897 compress_level = atoi(optarg);
1898 if (compress_level < 0 || compress_level > 9)
1915 if (old_restore && create)
1918 argc = argc - optind;
1921 source = argv[optind];
1922 target = argv[optind + 1];
1924 if (create && !strcmp(target, "-")) {
1927 out = fopen(target, "w+");
1929 perror("unable to create target file");
1934 if (num_threads == 0 && compress_level > 0) {
1935 num_threads = sysconf(_SC_NPROCESSORS_ONLN);
1936 if (num_threads <= 0)
1941 ret = create_metadump(source, out, num_threads,
1942 compress_level, sanitize, walk_trees);
1944 ret = restore_metadump(source, out, old_restore, 1);