Btrfs-progs: move btrfs_fsck_reinit_root to cmds-check.c
[platform/upstream/btrfs-progs.git] / btrfs-image.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <pthread.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <dirent.h>
29 #include <zlib.h>
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "version.h"
37 #include "volumes.h"
38 #include "extent_io.h"
39
40 #define HEADER_MAGIC            0xbd5c25e27295668bULL
41 #define MAX_PENDING_SIZE        (256 * 1024)
42 #define BLOCK_SIZE              1024
43 #define BLOCK_MASK              (BLOCK_SIZE - 1)
44
45 #define COMPRESS_NONE           0
46 #define COMPRESS_ZLIB           1
47
48 struct meta_cluster_item {
49         __le64 bytenr;
50         __le32 size;
51 } __attribute__ ((__packed__));
52
53 struct meta_cluster_header {
54         __le64 magic;
55         __le64 bytenr;
56         __le32 nritems;
57         u8 compress;
58 } __attribute__ ((__packed__));
59
60 /* cluster header + index items + buffers */
61 struct meta_cluster {
62         struct meta_cluster_header header;
63         struct meta_cluster_item items[];
64 } __attribute__ ((__packed__));
65
66 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
67                            sizeof(struct meta_cluster_item))
68
69 struct fs_chunk {
70         u64 logical;
71         u64 physical;
72         u64 bytes;
73         struct rb_node n;
74 };
75
76 struct async_work {
77         struct list_head list;
78         struct list_head ordered;
79         u64 start;
80         u64 size;
81         u8 *buffer;
82         size_t bufsize;
83         int error;
84 };
85
86 struct metadump_struct {
87         struct btrfs_root *root;
88         FILE *out;
89
90         struct meta_cluster *cluster;
91
92         pthread_t *threads;
93         size_t num_threads;
94         pthread_mutex_t mutex;
95         pthread_cond_t cond;
96         struct rb_root name_tree;
97
98         struct list_head list;
99         struct list_head ordered;
100         size_t num_items;
101         size_t num_ready;
102
103         u64 pending_start;
104         u64 pending_size;
105
106         int compress_level;
107         int done;
108         int data;
109         int sanitize_names;
110 };
111
112 struct name {
113         struct rb_node n;
114         char *val;
115         char *sub;
116         u32 len;
117 };
118
119 struct mdrestore_struct {
120         FILE *in;
121         FILE *out;
122
123         pthread_t *threads;
124         size_t num_threads;
125         pthread_mutex_t mutex;
126         pthread_cond_t cond;
127
128         struct rb_root chunk_tree;
129         struct list_head list;
130         size_t num_items;
131         u64 leafsize;
132         u64 devid;
133         u8 uuid[BTRFS_UUID_SIZE];
134         u8 fsid[BTRFS_FSID_SIZE];
135
136         int compress_method;
137         int done;
138         int error;
139         int old_restore;
140         int fixup_offset;
141         int multi_devices;
142         struct btrfs_fs_info *info;
143 };
144
145 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
146                                    u64 search, u64 cluster_bytenr);
147 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
148
149 static void csum_block(u8 *buf, size_t len)
150 {
151         char result[BTRFS_CRC32_SIZE];
152         u32 crc = ~(u32)0;
153         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
154         btrfs_csum_final(crc, result);
155         memcpy(buf, result, BTRFS_CRC32_SIZE);
156 }
157
158 static int has_name(struct btrfs_key *key)
159 {
160         switch (key->type) {
161         case BTRFS_DIR_ITEM_KEY:
162         case BTRFS_DIR_INDEX_KEY:
163         case BTRFS_INODE_REF_KEY:
164         case BTRFS_INODE_EXTREF_KEY:
165                 return 1;
166         default:
167                 break;
168         }
169
170         return 0;
171 }
172
173 static char *generate_garbage(u32 name_len)
174 {
175         char *buf = malloc(name_len);
176         int i;
177
178         if (!buf)
179                 return NULL;
180
181         for (i = 0; i < name_len; i++) {
182                 char c = rand() % 94 + 33;
183
184                 if (c == '/')
185                         c++;
186                 buf[i] = c;
187         }
188
189         return buf;
190 }
191
192 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
193 {
194         struct name *entry = rb_entry(a, struct name, n);
195         struct name *ins = rb_entry(b, struct name, n);
196         u32 len;
197
198         len = min(ins->len, entry->len);
199         return memcmp(ins->val, entry->val, len);
200 }
201
202 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
203 {
204         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, n);
205         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, n);
206
207         if (fuzz && ins->logical >= entry->logical &&
208             ins->logical < entry->logical + entry->bytes)
209                 return 0;
210
211         if (ins->logical < entry->logical)
212                 return -1;
213         else if (ins->logical > entry->logical)
214                 return 1;
215         return 0;
216 }
217
218 static void tree_insert(struct rb_root *root, struct rb_node *ins,
219                         int (*cmp)(struct rb_node *a, struct rb_node *b,
220                                    int fuzz))
221 {
222         struct rb_node ** p = &root->rb_node;
223         struct rb_node * parent = NULL;
224         int dir;
225
226         while(*p) {
227                 parent = *p;
228
229                 dir = cmp(*p, ins, 0);
230                 if (dir < 0)
231                         p = &(*p)->rb_left;
232                 else if (dir > 0)
233                         p = &(*p)->rb_right;
234                 else
235                         BUG();
236         }
237
238         rb_link_node(ins, parent, p);
239         rb_insert_color(ins, root);
240 }
241
242 static struct rb_node *tree_search(struct rb_root *root,
243                                    struct rb_node *search,
244                                    int (*cmp)(struct rb_node *a,
245                                               struct rb_node *b, int fuzz),
246                                    int fuzz)
247 {
248         struct rb_node *n = root->rb_node;
249         int dir;
250
251         while (n) {
252                 dir = cmp(n, search, fuzz);
253                 if (dir < 0)
254                         n = n->rb_left;
255                 else if (dir > 0)
256                         n = n->rb_right;
257                 else
258                         return n;
259         }
260
261         return NULL;
262 }
263
264 static char *find_collision(struct metadump_struct *md, char *name,
265                             u32 name_len)
266 {
267         struct name *val;
268         struct rb_node *entry;
269         struct name tmp;
270         unsigned long checksum;
271         int found = 0;
272         int i;
273
274         tmp.val = name;
275         tmp.len = name_len;
276         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
277         if (entry) {
278                 val = rb_entry(entry, struct name, n);
279                 free(name);
280                 return val->sub;
281         }
282
283         val = malloc(sizeof(struct name));
284         if (!val) {
285                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
286                 return NULL;
287         }
288
289         memset(val, 0, sizeof(*val));
290
291         val->val = name;
292         val->len = name_len;
293         val->sub = malloc(name_len);
294         if (!val->sub) {
295                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
296                 free(val);
297                 return NULL;
298         }
299
300         checksum = crc32c(~1, val->val, name_len);
301         memset(val->sub, ' ', name_len);
302         i = 0;
303         while (1) {
304                 if (crc32c(~1, val->sub, name_len) == checksum &&
305                     memcmp(val->sub, val->val, val->len)) {
306                         found = 1;
307                         break;
308                 }
309
310                 if (val->sub[i] == 127) {
311                         do {
312                                 i++;
313                                 if (i > name_len)
314                                         break;
315                         } while (val->sub[i] == 127);
316
317                         if (i > name_len)
318                                 break;
319                         val->sub[i]++;
320                         if (val->sub[i] == '/')
321                                 val->sub[i]++;
322                         memset(val->sub, ' ', i);
323                         i = 0;
324                         continue;
325                 } else {
326                         val->sub[i]++;
327                         if (val->sub[i] == '/')
328                                 val->sub[i]++;
329                 }
330         }
331
332         if (!found) {
333                 fprintf(stderr, "Couldn't find a collision for '%.*s', "
334                         "generating normal garbage, it won't match indexes\n",
335                         val->len, val->val);
336                 for (i = 0; i < name_len; i++) {
337                         char c = rand() % 94 + 33;
338
339                         if (c == '/')
340                                 c++;
341                         val->sub[i] = c;
342                 }
343         }
344
345         tree_insert(&md->name_tree, &val->n, name_cmp);
346         return val->sub;
347 }
348
349 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
350                               int slot)
351 {
352         struct btrfs_dir_item *dir_item;
353         char *buf;
354         char *garbage;
355         unsigned long name_ptr;
356         u32 total_len;
357         u32 cur = 0;
358         u32 this_len;
359         u32 name_len;
360         int free_garbage = (md->sanitize_names == 1);
361
362         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
363         total_len = btrfs_item_size_nr(eb, slot);
364         while (cur < total_len) {
365                 this_len = sizeof(*dir_item) +
366                         btrfs_dir_name_len(eb, dir_item) +
367                         btrfs_dir_data_len(eb, dir_item);
368                 name_ptr = (unsigned long)(dir_item + 1);
369                 name_len = btrfs_dir_name_len(eb, dir_item);
370
371                 if (md->sanitize_names > 1) {
372                         buf = malloc(name_len);
373                         if (!buf) {
374                                 fprintf(stderr, "Couldn't sanitize name, "
375                                         "enomem\n");
376                                 return;
377                         }
378                         read_extent_buffer(eb, buf, name_ptr, name_len);
379                         garbage = find_collision(md, buf, name_len);
380                 } else {
381                         garbage = generate_garbage(name_len);
382                 }
383                 if (!garbage) {
384                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
385                         return;
386                 }
387                 write_extent_buffer(eb, garbage, name_ptr, name_len);
388                 cur += this_len;
389                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
390                                                      this_len);
391                 if (free_garbage)
392                         free(garbage);
393         }
394 }
395
396 static void sanitize_inode_ref(struct metadump_struct *md,
397                                struct extent_buffer *eb, int slot, int ext)
398 {
399         struct btrfs_inode_extref *extref;
400         struct btrfs_inode_ref *ref;
401         char *garbage, *buf;
402         unsigned long ptr;
403         unsigned long name_ptr;
404         u32 item_size;
405         u32 cur_offset = 0;
406         int len;
407         int free_garbage = (md->sanitize_names == 1);
408
409         item_size = btrfs_item_size_nr(eb, slot);
410         ptr = btrfs_item_ptr_offset(eb, slot);
411         while (cur_offset < item_size) {
412                 if (ext) {
413                         extref = (struct btrfs_inode_extref *)(ptr +
414                                                                cur_offset);
415                         name_ptr = (unsigned long)(&extref->name);
416                         len = btrfs_inode_extref_name_len(eb, extref);
417                         cur_offset += sizeof(*extref);
418                 } else {
419                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
420                         len = btrfs_inode_ref_name_len(eb, ref);
421                         name_ptr = (unsigned long)(ref + 1);
422                         cur_offset += sizeof(*ref);
423                 }
424                 cur_offset += len;
425
426                 if (md->sanitize_names > 1) {
427                         buf = malloc(len);
428                         if (!buf) {
429                                 fprintf(stderr, "Couldn't sanitize name, "
430                                         "enomem\n");
431                                 return;
432                         }
433                         read_extent_buffer(eb, buf, name_ptr, len);
434                         garbage = find_collision(md, buf, len);
435                 } else {
436                         garbage = generate_garbage(len);
437                 }
438
439                 if (!garbage) {
440                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
441                         return;
442                 }
443                 write_extent_buffer(eb, garbage, name_ptr, len);
444                 if (free_garbage)
445                         free(garbage);
446         }
447 }
448
449 static void sanitize_name(struct metadump_struct *md, u8 *dst,
450                           struct extent_buffer *src, struct btrfs_key *key,
451                           int slot)
452 {
453         struct extent_buffer *eb;
454
455         eb = alloc_dummy_eb(src->start, src->len);
456         if (!eb) {
457                 fprintf(stderr, "Couldn't sanitize name, no memory\n");
458                 return;
459         }
460
461         memcpy(eb->data, dst, eb->len);
462
463         switch (key->type) {
464         case BTRFS_DIR_ITEM_KEY:
465         case BTRFS_DIR_INDEX_KEY:
466                 sanitize_dir_item(md, eb, slot);
467                 break;
468         case BTRFS_INODE_REF_KEY:
469                 sanitize_inode_ref(md, eb, slot, 0);
470                 break;
471         case BTRFS_INODE_EXTREF_KEY:
472                 sanitize_inode_ref(md, eb, slot, 1);
473                 break;
474         default:
475                 break;
476         }
477
478         memcpy(dst, eb->data, eb->len);
479         free(eb);
480 }
481
482 /*
483  * zero inline extents and csum items
484  */
485 static void zero_items(struct metadump_struct *md, u8 *dst,
486                        struct extent_buffer *src)
487 {
488         struct btrfs_file_extent_item *fi;
489         struct btrfs_item *item;
490         struct btrfs_key key;
491         u32 nritems = btrfs_header_nritems(src);
492         size_t size;
493         unsigned long ptr;
494         int i, extent_type;
495
496         for (i = 0; i < nritems; i++) {
497                 item = btrfs_item_nr(src, i);
498                 btrfs_item_key_to_cpu(src, &key, i);
499                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
500                         size = btrfs_item_size_nr(src, i);
501                         memset(dst + btrfs_leaf_data(src) +
502                                btrfs_item_offset_nr(src, i), 0, size);
503                         continue;
504                 }
505
506                 if (md->sanitize_names && has_name(&key)) {
507                         sanitize_name(md, dst, src, &key, i);
508                         continue;
509                 }
510
511                 if (key.type != BTRFS_EXTENT_DATA_KEY)
512                         continue;
513
514                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
515                 extent_type = btrfs_file_extent_type(src, fi);
516                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
517                         continue;
518
519                 ptr = btrfs_file_extent_inline_start(fi);
520                 size = btrfs_file_extent_inline_item_len(src, item);
521                 memset(dst + ptr, 0, size);
522         }
523 }
524
525 /*
526  * copy buffer and zero useless data in the buffer
527  */
528 static void copy_buffer(struct metadump_struct *md, u8 *dst,
529                         struct extent_buffer *src)
530 {
531         int level;
532         size_t size;
533         u32 nritems;
534
535         memcpy(dst, src->data, src->len);
536         if (src->start == BTRFS_SUPER_INFO_OFFSET)
537                 return;
538
539         level = btrfs_header_level(src);
540         nritems = btrfs_header_nritems(src);
541
542         if (nritems == 0) {
543                 size = sizeof(struct btrfs_header);
544                 memset(dst + size, 0, src->len - size);
545         } else if (level == 0) {
546                 size = btrfs_leaf_data(src) +
547                         btrfs_item_offset_nr(src, nritems - 1) -
548                         btrfs_item_nr_offset(nritems);
549                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
550                 zero_items(md, dst, src);
551         } else {
552                 size = offsetof(struct btrfs_node, ptrs) +
553                         sizeof(struct btrfs_key_ptr) * nritems;
554                 memset(dst + size, 0, src->len - size);
555         }
556         csum_block(dst, src->len);
557 }
558
559 static void *dump_worker(void *data)
560 {
561         struct metadump_struct *md = (struct metadump_struct *)data;
562         struct async_work *async;
563         int ret;
564
565         while (1) {
566                 pthread_mutex_lock(&md->mutex);
567                 while (list_empty(&md->list)) {
568                         if (md->done) {
569                                 pthread_mutex_unlock(&md->mutex);
570                                 goto out;
571                         }
572                         pthread_cond_wait(&md->cond, &md->mutex);
573                 }
574                 async = list_entry(md->list.next, struct async_work, list);
575                 list_del_init(&async->list);
576                 pthread_mutex_unlock(&md->mutex);
577
578                 if (md->compress_level > 0) {
579                         u8 *orig = async->buffer;
580
581                         async->bufsize = compressBound(async->size);
582                         async->buffer = malloc(async->bufsize);
583
584                         ret = compress2(async->buffer,
585                                          (unsigned long *)&async->bufsize,
586                                          orig, async->size, md->compress_level);
587
588                         if (ret != Z_OK)
589                                 async->error = 1;
590
591                         free(orig);
592                 }
593
594                 pthread_mutex_lock(&md->mutex);
595                 md->num_ready++;
596                 pthread_mutex_unlock(&md->mutex);
597         }
598 out:
599         pthread_exit(NULL);
600 }
601
602 static void meta_cluster_init(struct metadump_struct *md, u64 start)
603 {
604         struct meta_cluster_header *header;
605
606         md->num_items = 0;
607         md->num_ready = 0;
608         header = &md->cluster->header;
609         header->magic = cpu_to_le64(HEADER_MAGIC);
610         header->bytenr = cpu_to_le64(start);
611         header->nritems = cpu_to_le32(0);
612         header->compress = md->compress_level > 0 ?
613                            COMPRESS_ZLIB : COMPRESS_NONE;
614 }
615
616 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
617                          FILE *out, int num_threads, int compress_level,
618                          int sanitize_names)
619 {
620         int i, ret = 0;
621
622         memset(md, 0, sizeof(*md));
623         pthread_cond_init(&md->cond, NULL);
624         pthread_mutex_init(&md->mutex, NULL);
625         INIT_LIST_HEAD(&md->list);
626         INIT_LIST_HEAD(&md->ordered);
627         md->root = root;
628         md->out = out;
629         md->pending_start = (u64)-1;
630         md->compress_level = compress_level;
631         md->cluster = calloc(1, BLOCK_SIZE);
632         md->sanitize_names = sanitize_names;
633         if (sanitize_names > 1)
634                 crc32c_optimization_init();
635
636         if (!md->cluster) {
637                 pthread_cond_destroy(&md->cond);
638                 pthread_mutex_destroy(&md->mutex);
639                 return -ENOMEM;
640         }
641
642         meta_cluster_init(md, 0);
643         if (!num_threads)
644                 return 0;
645
646         md->name_tree.rb_node = NULL;
647         md->num_threads = num_threads;
648         md->threads = calloc(num_threads, sizeof(pthread_t));
649         if (!md->threads) {
650                 free(md->cluster);
651                 pthread_cond_destroy(&md->cond);
652                 pthread_mutex_destroy(&md->mutex);
653                 return -ENOMEM;
654         }
655
656         for (i = 0; i < num_threads; i++) {
657                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
658                 if (ret)
659                         break;
660         }
661
662         if (ret) {
663                 pthread_mutex_lock(&md->mutex);
664                 md->done = 1;
665                 pthread_cond_broadcast(&md->cond);
666                 pthread_mutex_unlock(&md->mutex);
667
668                 for (i--; i >= 0; i--)
669                         pthread_join(md->threads[i], NULL);
670
671                 pthread_cond_destroy(&md->cond);
672                 pthread_mutex_destroy(&md->mutex);
673                 free(md->cluster);
674                 free(md->threads);
675         }
676
677         return ret;
678 }
679
680 static void metadump_destroy(struct metadump_struct *md)
681 {
682         int i;
683         struct rb_node *n;
684
685         pthread_mutex_lock(&md->mutex);
686         md->done = 1;
687         pthread_cond_broadcast(&md->cond);
688         pthread_mutex_unlock(&md->mutex);
689
690         for (i = 0; i < md->num_threads; i++)
691                 pthread_join(md->threads[i], NULL);
692
693         pthread_cond_destroy(&md->cond);
694         pthread_mutex_destroy(&md->mutex);
695
696         while ((n = rb_first(&md->name_tree))) {
697                 struct name *name;
698
699                 name = rb_entry(n, struct name, n);
700                 rb_erase(n, &md->name_tree);
701                 free(name->val);
702                 free(name->sub);
703                 free(name);
704         }
705         free(md->threads);
706         free(md->cluster);
707 }
708
709 static int write_zero(FILE *out, size_t size)
710 {
711         static char zero[BLOCK_SIZE];
712         return fwrite(zero, size, 1, out);
713 }
714
715 static int write_buffers(struct metadump_struct *md, u64 *next)
716 {
717         struct meta_cluster_header *header = &md->cluster->header;
718         struct meta_cluster_item *item;
719         struct async_work *async;
720         u64 bytenr = 0;
721         u32 nritems = 0;
722         int ret;
723         int err = 0;
724
725         if (list_empty(&md->ordered))
726                 goto out;
727
728         /* wait until all buffers are compressed */
729         while (md->num_items > md->num_ready) {
730                 struct timespec ts = {
731                         .tv_sec = 0,
732                         .tv_nsec = 10000000,
733                 };
734                 pthread_mutex_unlock(&md->mutex);
735                 nanosleep(&ts, NULL);
736                 pthread_mutex_lock(&md->mutex);
737         }
738
739         /* setup and write index block */
740         list_for_each_entry(async, &md->ordered, ordered) {
741                 item = md->cluster->items + nritems;
742                 item->bytenr = cpu_to_le64(async->start);
743                 item->size = cpu_to_le32(async->bufsize);
744                 nritems++;
745         }
746         header->nritems = cpu_to_le32(nritems);
747
748         ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
749         if (ret != 1) {
750                 fprintf(stderr, "Error writing out cluster: %d\n", errno);
751                 return -EIO;
752         }
753
754         /* write buffers */
755         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
756         while (!list_empty(&md->ordered)) {
757                 async = list_entry(md->ordered.next, struct async_work,
758                                    ordered);
759                 list_del_init(&async->ordered);
760
761                 bytenr += async->bufsize;
762                 if (!err)
763                         ret = fwrite(async->buffer, async->bufsize, 1,
764                                      md->out);
765                 if (ret != 1) {
766                         err = -EIO;
767                         ret = 0;
768                         fprintf(stderr, "Error writing out cluster: %d\n",
769                                 errno);
770                 }
771
772                 free(async->buffer);
773                 free(async);
774         }
775
776         /* zero unused space in the last block */
777         if (!err && bytenr & BLOCK_MASK) {
778                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
779
780                 bytenr += size;
781                 ret = write_zero(md->out, size);
782                 if (ret != 1) {
783                         fprintf(stderr, "Error zeroing out buffer: %d\n",
784                                 errno);
785                         err = -EIO;
786                 }
787         }
788 out:
789         *next = bytenr;
790         return err;
791 }
792
793 static int read_data_extent(struct metadump_struct *md,
794                             struct async_work *async)
795 {
796         struct btrfs_multi_bio *multi = NULL;
797         struct btrfs_device *device;
798         u64 bytes_left = async->size;
799         u64 logical = async->start;
800         u64 offset = 0;
801         u64 bytenr;
802         u64 read_len;
803         ssize_t done;
804         int fd;
805         int ret;
806
807         while (bytes_left) {
808                 read_len = bytes_left;
809                 ret = btrfs_map_block(&md->root->fs_info->mapping_tree, READ,
810                                       logical, &read_len, &multi, 0, NULL);
811                 if (ret) {
812                         fprintf(stderr, "Couldn't map data block %d\n", ret);
813                         return ret;
814                 }
815
816                 device = multi->stripes[0].dev;
817
818                 if (device->fd == 0) {
819                         fprintf(stderr,
820                                 "Device we need to read from is not open\n");
821                         free(multi);
822                         return -EIO;
823                 }
824                 fd = device->fd;
825                 bytenr = multi->stripes[0].physical;
826                 free(multi);
827
828                 read_len = min(read_len, bytes_left);
829                 done = pread64(fd, async->buffer+offset, read_len, bytenr);
830                 if (done < read_len) {
831                         if (done < 0)
832                                 fprintf(stderr, "Error reading extent %d\n",
833                                         errno);
834                         else
835                                 fprintf(stderr, "Short read\n");
836                         return -EIO;
837                 }
838
839                 bytes_left -= done;
840                 offset += done;
841                 logical += done;
842         }
843
844         return 0;
845 }
846
847 static int flush_pending(struct metadump_struct *md, int done)
848 {
849         struct async_work *async = NULL;
850         struct extent_buffer *eb;
851         u64 blocksize = md->root->nodesize;
852         u64 start;
853         u64 size;
854         size_t offset;
855         int ret = 0;
856
857         if (md->pending_size) {
858                 async = calloc(1, sizeof(*async));
859                 if (!async)
860                         return -ENOMEM;
861
862                 async->start = md->pending_start;
863                 async->size = md->pending_size;
864                 async->bufsize = async->size;
865                 async->buffer = malloc(async->bufsize);
866                 if (!async->buffer) {
867                         free(async);
868                         return -ENOMEM;
869                 }
870                 offset = 0;
871                 start = async->start;
872                 size = async->size;
873
874                 if (md->data) {
875                         ret = read_data_extent(md, async);
876                         if (ret) {
877                                 free(async->buffer);
878                                 free(async);
879                                 return ret;
880                         }
881                 }
882
883                 while (!md->data && size > 0) {
884                         u64 this_read = min(blocksize, size);
885                         eb = read_tree_block(md->root, start, this_read, 0);
886                         if (!eb) {
887                                 free(async->buffer);
888                                 free(async);
889                                 fprintf(stderr,
890                                         "Error reading metadata block\n");
891                                 return -EIO;
892                         }
893                         copy_buffer(md, async->buffer + offset, eb);
894                         free_extent_buffer(eb);
895                         start += this_read;
896                         offset += this_read;
897                         size -= this_read;
898                 }
899
900                 md->pending_start = (u64)-1;
901                 md->pending_size = 0;
902         } else if (!done) {
903                 return 0;
904         }
905
906         pthread_mutex_lock(&md->mutex);
907         if (async) {
908                 list_add_tail(&async->ordered, &md->ordered);
909                 md->num_items++;
910                 if (md->compress_level > 0) {
911                         list_add_tail(&async->list, &md->list);
912                         pthread_cond_signal(&md->cond);
913                 } else {
914                         md->num_ready++;
915                 }
916         }
917         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
918                 ret = write_buffers(md, &start);
919                 if (ret)
920                         fprintf(stderr, "Error writing buffers %d\n",
921                                 errno);
922                 else
923                         meta_cluster_init(md, start);
924         }
925         pthread_mutex_unlock(&md->mutex);
926         return ret;
927 }
928
929 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
930                       int data)
931 {
932         int ret;
933         if (md->data != data ||
934             md->pending_size + size > MAX_PENDING_SIZE ||
935             md->pending_start + md->pending_size != start) {
936                 ret = flush_pending(md, 0);
937                 if (ret)
938                         return ret;
939                 md->pending_start = start;
940         }
941         readahead_tree_block(md->root, start, size, 0);
942         md->pending_size += size;
943         md->data = data;
944         return 0;
945 }
946
947 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
948 static int is_tree_block(struct btrfs_root *extent_root,
949                          struct btrfs_path *path, u64 bytenr)
950 {
951         struct extent_buffer *leaf;
952         struct btrfs_key key;
953         u64 ref_objectid;
954         int ret;
955
956         leaf = path->nodes[0];
957         while (1) {
958                 struct btrfs_extent_ref_v0 *ref_item;
959                 path->slots[0]++;
960                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
961                         ret = btrfs_next_leaf(extent_root, path);
962                         if (ret < 0)
963                                 return ret;
964                         if (ret > 0)
965                                 break;
966                         leaf = path->nodes[0];
967                 }
968                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
969                 if (key.objectid != bytenr)
970                         break;
971                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
972                         continue;
973                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
974                                           struct btrfs_extent_ref_v0);
975                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
976                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
977                         return 1;
978                 break;
979         }
980         return 0;
981 }
982 #endif
983
984 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
985                             struct metadump_struct *metadump, int root_tree)
986 {
987         struct extent_buffer *tmp;
988         struct btrfs_root_item *ri;
989         struct btrfs_key key;
990         u64 bytenr;
991         int level;
992         int nritems = 0;
993         int i = 0;
994         int ret;
995
996         ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0);
997         if (ret) {
998                 fprintf(stderr, "Error adding metadata block\n");
999                 return ret;
1000         }
1001
1002         if (btrfs_header_level(eb) == 0 && !root_tree)
1003                 return 0;
1004
1005         level = btrfs_header_level(eb);
1006         nritems = btrfs_header_nritems(eb);
1007         for (i = 0; i < nritems; i++) {
1008                 if (level == 0) {
1009                         btrfs_item_key_to_cpu(eb, &key, i);
1010                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1011                                 continue;
1012                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1013                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1014                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
1015                         if (!tmp) {
1016                                 fprintf(stderr,
1017                                         "Error reading log root block\n");
1018                                 return -EIO;
1019                         }
1020                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1021                         free_extent_buffer(tmp);
1022                         if (ret)
1023                                 return ret;
1024                 } else {
1025                         bytenr = btrfs_node_blockptr(eb, i);
1026                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
1027                         if (!tmp) {
1028                                 fprintf(stderr, "Error reading log block\n");
1029                                 return -EIO;
1030                         }
1031                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1032                         free_extent_buffer(tmp);
1033                         if (ret)
1034                                 return ret;
1035                 }
1036         }
1037
1038         return 0;
1039 }
1040
1041 static int copy_log_trees(struct btrfs_root *root,
1042                           struct metadump_struct *metadump,
1043                           struct btrfs_path *path)
1044 {
1045         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1046
1047         if (blocknr == 0)
1048                 return 0;
1049
1050         if (!root->fs_info->log_root_tree ||
1051             !root->fs_info->log_root_tree->node) {
1052                 fprintf(stderr, "Error copying tree log, it wasn't setup\n");
1053                 return -EIO;
1054         }
1055
1056         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1057                                 metadump, 1);
1058 }
1059
1060 static int copy_space_cache(struct btrfs_root *root,
1061                             struct metadump_struct *metadump,
1062                             struct btrfs_path *path)
1063 {
1064         struct extent_buffer *leaf;
1065         struct btrfs_file_extent_item *fi;
1066         struct btrfs_key key;
1067         u64 bytenr, num_bytes;
1068         int ret;
1069
1070         root = root->fs_info->tree_root;
1071
1072         key.objectid = 0;
1073         key.type = BTRFS_EXTENT_DATA_KEY;
1074         key.offset = 0;
1075
1076         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1077         if (ret < 0) {
1078                 fprintf(stderr, "Error searching for free space inode %d\n",
1079                         ret);
1080                 return ret;
1081         }
1082
1083         while (1) {
1084                 leaf = path->nodes[0];
1085                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1086                         ret = btrfs_next_leaf(root, path);
1087                         if (ret < 0) {
1088                                 fprintf(stderr, "Error going to next leaf "
1089                                         "%d\n", ret);
1090                                 return ret;
1091                         }
1092                         if (ret > 0)
1093                                 break;
1094                         leaf = path->nodes[0];
1095                 }
1096
1097                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1098                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1099                         path->slots[0]++;
1100                         continue;
1101                 }
1102
1103                 fi = btrfs_item_ptr(leaf, path->slots[0],
1104                                     struct btrfs_file_extent_item);
1105                 if (btrfs_file_extent_type(leaf, fi) !=
1106                     BTRFS_FILE_EXTENT_REG) {
1107                         path->slots[0]++;
1108                         continue;
1109                 }
1110
1111                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1112                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1113                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1114                 if (ret) {
1115                         fprintf(stderr, "Error adding space cache blocks %d\n",
1116                                 ret);
1117                         btrfs_release_path(root, path);
1118                         return ret;
1119                 }
1120                 path->slots[0]++;
1121         }
1122
1123         return 0;
1124 }
1125
1126 static int copy_from_extent_tree(struct metadump_struct *metadump,
1127                                  struct btrfs_path *path)
1128 {
1129         struct btrfs_root *extent_root;
1130         struct extent_buffer *leaf;
1131         struct btrfs_extent_item *ei;
1132         struct btrfs_key key;
1133         u64 bytenr;
1134         u64 num_bytes;
1135         int ret;
1136
1137         extent_root = metadump->root->fs_info->extent_root;
1138         bytenr = BTRFS_SUPER_INFO_OFFSET + 4096;
1139         key.objectid = bytenr;
1140         key.type = BTRFS_EXTENT_ITEM_KEY;
1141         key.offset = 0;
1142
1143         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1144         if (ret < 0) {
1145                 fprintf(stderr, "Error searching extent root %d\n", ret);
1146                 return ret;
1147         }
1148         ret = 0;
1149
1150         while (1) {
1151                 leaf = path->nodes[0];
1152                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1153                         ret = btrfs_next_leaf(extent_root, path);
1154                         if (ret < 0) {
1155                                 fprintf(stderr, "Error going to next leaf %d"
1156                                         "\n", ret);
1157                                 break;
1158                         }
1159                         if (ret > 0) {
1160                                 ret = 0;
1161                                 break;
1162                         }
1163                         leaf = path->nodes[0];
1164                 }
1165
1166                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1167                 if (key.objectid < bytenr ||
1168                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1169                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1170                         path->slots[0]++;
1171                         continue;
1172                 }
1173
1174                 bytenr = key.objectid;
1175                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1176                         num_bytes = extent_root->leafsize;
1177                 else
1178                         num_bytes = key.offset;
1179
1180                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1181                         ei = btrfs_item_ptr(leaf, path->slots[0],
1182                                             struct btrfs_extent_item);
1183                         if (btrfs_extent_flags(leaf, ei) &
1184                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1185                                 ret = add_extent(bytenr, num_bytes, metadump,
1186                                                  0);
1187                                 if (ret) {
1188                                         fprintf(stderr, "Error adding block "
1189                                                 "%d\n", ret);
1190                                         break;
1191                                 }
1192                         }
1193                 } else {
1194 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1195                         ret = is_tree_block(extent_root, path, bytenr);
1196                         if (ret < 0) {
1197                                 fprintf(stderr, "Error checking tree block "
1198                                         "%d\n", ret);
1199                                 break;
1200                         }
1201
1202                         if (ret) {
1203                                 ret = add_extent(bytenr, num_bytes, metadump,
1204                                                  0);
1205                                 if (ret) {
1206                                         fprintf(stderr, "Error adding block "
1207                                                 "%d\n", ret);
1208                                         break;
1209                                 }
1210                         }
1211                         ret = 0;
1212 #else
1213                         fprintf(stderr, "Either extent tree corruption or "
1214                                 "you haven't built with V0 support\n");
1215                         ret = -EIO;
1216                         break;
1217 #endif
1218                 }
1219                 bytenr += num_bytes;
1220         }
1221
1222         btrfs_release_path(extent_root, path);
1223
1224         return ret;
1225 }
1226
1227 static int create_metadump(const char *input, FILE *out, int num_threads,
1228                            int compress_level, int sanitize, int walk_trees)
1229 {
1230         struct btrfs_root *root;
1231         struct btrfs_path *path = NULL;
1232         struct metadump_struct metadump;
1233         int ret;
1234         int err = 0;
1235
1236         root = open_ctree(input, 0, 0);
1237         if (!root) {
1238                 fprintf(stderr, "Open ctree failed\n");
1239                 return -EIO;
1240         }
1241
1242         BUG_ON(root->nodesize != root->leafsize);
1243
1244         ret = metadump_init(&metadump, root, out, num_threads,
1245                             compress_level, sanitize);
1246         if (ret) {
1247                 fprintf(stderr, "Error initing metadump %d\n", ret);
1248                 close_ctree(root);
1249                 return ret;
1250         }
1251
1252         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump, 0);
1253         if (ret) {
1254                 fprintf(stderr, "Error adding metadata %d\n", ret);
1255                 err = ret;
1256                 goto out;
1257         }
1258
1259         path = btrfs_alloc_path();
1260         if (!path) {
1261                 fprintf(stderr, "Out of memory allocing path\n");
1262                 err = -ENOMEM;
1263                 goto out;
1264         }
1265
1266         if (walk_trees) {
1267                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1268                                        &metadump, 1);
1269                 if (ret) {
1270                         err = ret;
1271                         goto out;
1272                 }
1273
1274                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1275                                        &metadump, 1);
1276                 if (ret) {
1277                         err = ret;
1278                         goto out;
1279                 }
1280         } else {
1281                 ret = copy_from_extent_tree(&metadump, path);
1282                 if (ret) {
1283                         err = ret;
1284                         goto out;
1285                 }
1286         }
1287
1288         ret = copy_log_trees(root, &metadump, path);
1289         if (ret) {
1290                 err = ret;
1291                 goto out;
1292         }
1293
1294         ret = copy_space_cache(root, &metadump, path);
1295 out:
1296         ret = flush_pending(&metadump, 1);
1297         if (ret) {
1298                 if (!err)
1299                         err = ret;
1300                 fprintf(stderr, "Error flushing pending %d\n", ret);
1301         }
1302
1303         metadump_destroy(&metadump);
1304
1305         btrfs_free_path(path);
1306         ret = close_ctree(root);
1307         return err ? err : ret;
1308 }
1309
1310 static void update_super_old(u8 *buffer)
1311 {
1312         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1313         struct btrfs_chunk *chunk;
1314         struct btrfs_disk_key *key;
1315         u32 sectorsize = btrfs_super_sectorsize(super);
1316         u64 flags = btrfs_super_flags(super);
1317
1318         flags |= BTRFS_SUPER_FLAG_METADUMP;
1319         btrfs_set_super_flags(super, flags);
1320
1321         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1322         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1323                                        sizeof(struct btrfs_disk_key));
1324
1325         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1326         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1327         btrfs_set_disk_key_offset(key, 0);
1328
1329         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1330         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1331         btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
1332         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1333         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1334         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1335         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1336         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1337         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1338         chunk->stripe.devid = super->dev_item.devid;
1339         chunk->stripe.offset = cpu_to_le64(0);
1340         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1341         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1342         csum_block(buffer, 4096);
1343 }
1344
1345 static int update_super(u8 *buffer)
1346 {
1347         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1348         struct btrfs_chunk *chunk;
1349         struct btrfs_disk_key *disk_key;
1350         struct btrfs_key key;
1351         u32 new_array_size = 0;
1352         u32 array_size;
1353         u32 cur = 0;
1354         u32 new_cur = 0;
1355         u8 *ptr, *write_ptr;
1356         int old_num_stripes;
1357
1358         write_ptr = ptr = super->sys_chunk_array;
1359         array_size = btrfs_super_sys_array_size(super);
1360
1361         while (cur < array_size) {
1362                 disk_key = (struct btrfs_disk_key *)ptr;
1363                 btrfs_disk_key_to_cpu(&key, disk_key);
1364
1365                 new_array_size += sizeof(*disk_key);
1366                 memmove(write_ptr, ptr, sizeof(*disk_key));
1367
1368                 write_ptr += sizeof(*disk_key);
1369                 ptr += sizeof(*disk_key);
1370                 cur += sizeof(*disk_key);
1371                 new_cur += sizeof(*disk_key);
1372
1373                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1374                         chunk = (struct btrfs_chunk *)ptr;
1375                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1376                         chunk = (struct btrfs_chunk *)write_ptr;
1377
1378                         memmove(write_ptr, ptr, sizeof(*chunk));
1379                         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1380                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1381                         btrfs_set_stack_chunk_type(chunk,
1382                                                    BTRFS_BLOCK_GROUP_SYSTEM);
1383                         chunk->stripe.devid = super->dev_item.devid;
1384                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1385                                BTRFS_UUID_SIZE);
1386                         new_array_size += sizeof(*chunk);
1387                         new_cur += sizeof(*chunk);
1388                 } else {
1389                         fprintf(stderr, "Bogus key in the sys chunk array "
1390                                 "%d\n", key.type);
1391                         return -EIO;
1392                 }
1393                 write_ptr += sizeof(*chunk);
1394                 ptr += btrfs_chunk_item_size(old_num_stripes);
1395                 cur += btrfs_chunk_item_size(old_num_stripes);
1396         }
1397
1398         btrfs_set_super_sys_array_size(super, new_array_size);
1399         csum_block(buffer, 4096);
1400
1401         return 0;
1402 }
1403
1404 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1405 {
1406         struct extent_buffer *eb;
1407
1408         eb = malloc(sizeof(struct extent_buffer) + size);
1409         if (!eb)
1410                 return NULL;
1411         memset(eb, 0, sizeof(struct extent_buffer) + size);
1412
1413         eb->start = bytenr;
1414         eb->len = size;
1415         return eb;
1416 }
1417
1418 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1419 {
1420         struct btrfs_item *item;
1421         u32 nritems;
1422         u32 old_size;
1423         u32 old_data_start;
1424         u32 size_diff;
1425         u32 data_end;
1426         int i;
1427
1428         old_size = btrfs_item_size_nr(eb, slot);
1429         if (old_size == new_size)
1430                 return;
1431
1432         nritems = btrfs_header_nritems(eb);
1433         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1434
1435         old_data_start = btrfs_item_offset_nr(eb, slot);
1436         size_diff = old_size - new_size;
1437
1438         for (i = slot; i < nritems; i++) {
1439                 u32 ioff;
1440                 item = btrfs_item_nr(eb, i);
1441                 ioff = btrfs_item_offset(eb, item);
1442                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1443         }
1444
1445         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1446                               btrfs_leaf_data(eb) + data_end,
1447                               old_data_start + new_size - data_end);
1448         item = btrfs_item_nr(eb, slot);
1449         btrfs_set_item_size(eb, item, new_size);
1450 }
1451
1452 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1453                                   struct async_work *async, u8 *buffer,
1454                                   size_t size)
1455 {
1456         struct extent_buffer *eb;
1457         size_t size_left = size;
1458         u64 bytenr = async->start;
1459         int i;
1460
1461         if (size_left % mdres->leafsize)
1462                 return 0;
1463
1464         eb = alloc_dummy_eb(bytenr, mdres->leafsize);
1465         if (!eb)
1466                 return -ENOMEM;
1467
1468         while (size_left) {
1469                 eb->start = bytenr;
1470                 memcpy(eb->data, buffer, mdres->leafsize);
1471
1472                 if (btrfs_header_bytenr(eb) != bytenr)
1473                         break;
1474                 if (memcmp(mdres->fsid,
1475                            eb->data + offsetof(struct btrfs_header, fsid),
1476                            BTRFS_FSID_SIZE))
1477                         break;
1478
1479                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1480                         goto next;
1481
1482                 if (btrfs_header_level(eb) != 0)
1483                         goto next;
1484
1485                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1486                         struct btrfs_chunk chunk;
1487                         struct btrfs_key key;
1488                         u64 type;
1489
1490                         btrfs_item_key_to_cpu(eb, &key, i);
1491                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1492                                 continue;
1493                         truncate_item(eb, i, sizeof(chunk));
1494                         read_extent_buffer(eb, &chunk,
1495                                            btrfs_item_ptr_offset(eb, i),
1496                                            sizeof(chunk));
1497
1498                         /* Zero out the RAID profile */
1499                         type = btrfs_stack_chunk_type(&chunk);
1500                         type &= (BTRFS_BLOCK_GROUP_DATA |
1501                                  BTRFS_BLOCK_GROUP_SYSTEM |
1502                                  BTRFS_BLOCK_GROUP_METADATA);
1503                         btrfs_set_stack_chunk_type(&chunk, type);
1504
1505                         btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1506                         btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1507                         btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid);
1508                         memcpy(chunk.stripe.dev_uuid, mdres->uuid,
1509                                BTRFS_UUID_SIZE);
1510                         write_extent_buffer(eb, &chunk,
1511                                             btrfs_item_ptr_offset(eb, i),
1512                                             sizeof(chunk));
1513                 }
1514                 memcpy(buffer, eb->data, eb->len);
1515                 csum_block(buffer, eb->len);
1516 next:
1517                 size_left -= mdres->leafsize;
1518                 buffer += mdres->leafsize;
1519                 bytenr += mdres->leafsize;
1520         }
1521
1522         return 0;
1523 }
1524
1525 static void write_backup_supers(int fd, u8 *buf)
1526 {
1527         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1528         struct stat st;
1529         u64 size;
1530         u64 bytenr;
1531         int i;
1532         int ret;
1533
1534         if (fstat(fd, &st)) {
1535                 fprintf(stderr, "Couldn't stat restore point, won't be able "
1536                         "to write backup supers: %d\n", errno);
1537                 return;
1538         }
1539
1540         size = btrfs_device_size(fd, &st);
1541
1542         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1543                 bytenr = btrfs_sb_offset(i);
1544                 if (bytenr + 4096 > size)
1545                         break;
1546                 btrfs_set_super_bytenr(super, bytenr);
1547                 csum_block(buf, 4096);
1548                 ret = pwrite64(fd, buf, 4096, bytenr);
1549                 if (ret < 4096) {
1550                         if (ret < 0)
1551                                 fprintf(stderr, "Problem writing out backup "
1552                                         "super block %d, err %d\n", i, errno);
1553                         else
1554                                 fprintf(stderr, "Short write writing out "
1555                                         "backup super block\n");
1556                         break;
1557                 }
1558         }
1559 }
1560
1561 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size)
1562 {
1563         struct fs_chunk *fs_chunk;
1564         struct rb_node *entry;
1565         struct fs_chunk search;
1566         u64 offset;
1567
1568         if (logical == BTRFS_SUPER_INFO_OFFSET)
1569                 return logical;
1570
1571         search.logical = logical;
1572         entry = tree_search(&mdres->chunk_tree, &search.n, chunk_cmp, 1);
1573         if (!entry) {
1574                 if (mdres->in != stdin)
1575                         printf("Couldn't find a chunk, using logical\n");
1576                 return logical;
1577         }
1578         fs_chunk = rb_entry(entry, struct fs_chunk, n);
1579         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
1580                 BUG();
1581         offset = search.logical - fs_chunk->logical;
1582
1583         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
1584         return fs_chunk->physical + offset;
1585 }
1586
1587 static void *restore_worker(void *data)
1588 {
1589         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1590         struct async_work *async;
1591         size_t size;
1592         u8 *buffer;
1593         u8 *outbuf;
1594         int outfd;
1595         int ret;
1596         int compress_size = MAX_PENDING_SIZE * 4;
1597
1598         outfd = fileno(mdres->out);
1599         buffer = malloc(compress_size);
1600         if (!buffer) {
1601                 fprintf(stderr, "Error allocing buffer\n");
1602                 pthread_mutex_lock(&mdres->mutex);
1603                 if (!mdres->error)
1604                         mdres->error = -ENOMEM;
1605                 pthread_mutex_unlock(&mdres->mutex);
1606                 goto out;
1607         }
1608
1609         while (1) {
1610                 u64 bytenr;
1611                 off_t offset = 0;
1612                 int err = 0;
1613
1614                 pthread_mutex_lock(&mdres->mutex);
1615                 while (!mdres->leafsize || list_empty(&mdres->list)) {
1616                         if (mdres->done) {
1617                                 pthread_mutex_unlock(&mdres->mutex);
1618                                 goto out;
1619                         }
1620                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1621                 }
1622                 async = list_entry(mdres->list.next, struct async_work, list);
1623                 list_del_init(&async->list);
1624                 pthread_mutex_unlock(&mdres->mutex);
1625
1626                 if (mdres->compress_method == COMPRESS_ZLIB) {
1627                         size = compress_size; 
1628                         ret = uncompress(buffer, (unsigned long *)&size,
1629                                          async->buffer, async->bufsize);
1630                         if (ret != Z_OK) {
1631                                 fprintf(stderr, "Error decompressing %d\n",
1632                                         ret);
1633                                 err = -EIO;
1634                         }
1635                         outbuf = buffer;
1636                 } else {
1637                         outbuf = async->buffer;
1638                         size = async->bufsize;
1639                 }
1640
1641                 if (!mdres->multi_devices) {
1642                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1643                                 if (mdres->old_restore) {
1644                                         update_super_old(outbuf);
1645                                 } else {
1646                                         ret = update_super(outbuf);
1647                                         if (ret)
1648                                                 err = ret;
1649                                 }
1650                         } else if (!mdres->old_restore) {
1651                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1652                                 if (ret)
1653                                         err = ret;
1654                         }
1655                 }
1656
1657                 if (!mdres->fixup_offset) {
1658                         while (size) {
1659                                 u64 chunk_size = size;
1660                                 if (!mdres->multi_devices)
1661                                         bytenr = logical_to_physical(mdres,
1662                                                                      async->start + offset,
1663                                                                      &chunk_size);
1664                                 else
1665                                         bytenr = async->start + offset;
1666
1667                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1668                                                bytenr);
1669                                 if (ret != chunk_size) {
1670                                         if (ret < 0) {
1671                                                 fprintf(stderr, "Error writing to "
1672                                                         "device %d\n", errno);
1673                                                 err = errno;
1674                                                 break;
1675                                         } else {
1676                                                 fprintf(stderr, "Short write\n");
1677                                                 err = -EIO;
1678                                                 break;
1679                                         }
1680                                 }
1681                                 size -= chunk_size;
1682                                 offset += chunk_size;
1683                         }
1684                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1685                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1686                         if (ret) {
1687                                 printk("Error write data\n");
1688                                 exit(1);
1689                         }
1690                 }
1691
1692
1693                 /* backup super blocks are already there at fixup_offset stage */
1694                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1695                         write_backup_supers(outfd, outbuf);
1696
1697                 pthread_mutex_lock(&mdres->mutex);
1698                 if (err && !mdres->error)
1699                         mdres->error = err;
1700                 mdres->num_items--;
1701                 pthread_mutex_unlock(&mdres->mutex);
1702
1703                 free(async->buffer);
1704                 free(async);
1705         }
1706 out:
1707         free(buffer);
1708         pthread_exit(NULL);
1709 }
1710
1711 static void mdrestore_destroy(struct mdrestore_struct *mdres)
1712 {
1713         struct rb_node *n;
1714         int i;
1715
1716         while ((n = rb_first(&mdres->chunk_tree))) {
1717                 struct fs_chunk *entry;
1718
1719                 entry = rb_entry(n, struct fs_chunk, n);
1720                 rb_erase(n, &mdres->chunk_tree);
1721                 free(entry);
1722         }
1723         pthread_mutex_lock(&mdres->mutex);
1724         mdres->done = 1;
1725         pthread_cond_broadcast(&mdres->cond);
1726         pthread_mutex_unlock(&mdres->mutex);
1727
1728         for (i = 0; i < mdres->num_threads; i++)
1729                 pthread_join(mdres->threads[i], NULL);
1730
1731         pthread_cond_destroy(&mdres->cond);
1732         pthread_mutex_destroy(&mdres->mutex);
1733         free(mdres->threads);
1734 }
1735
1736 static int mdrestore_init(struct mdrestore_struct *mdres,
1737                           FILE *in, FILE *out, int old_restore,
1738                           int num_threads, int fixup_offset,
1739                           struct btrfs_fs_info *info, int multi_devices)
1740 {
1741         int i, ret = 0;
1742
1743         memset(mdres, 0, sizeof(*mdres));
1744         pthread_cond_init(&mdres->cond, NULL);
1745         pthread_mutex_init(&mdres->mutex, NULL);
1746         INIT_LIST_HEAD(&mdres->list);
1747         mdres->in = in;
1748         mdres->out = out;
1749         mdres->old_restore = old_restore;
1750         mdres->chunk_tree.rb_node = NULL;
1751         mdres->fixup_offset = fixup_offset;
1752         mdres->info = info;
1753         mdres->multi_devices = multi_devices;
1754
1755         if (!num_threads)
1756                 return 0;
1757
1758         mdres->num_threads = num_threads;
1759         mdres->threads = calloc(num_threads, sizeof(pthread_t));
1760         if (!mdres->threads)
1761                 return -ENOMEM;
1762         for (i = 0; i < num_threads; i++) {
1763                 ret = pthread_create(mdres->threads + i, NULL, restore_worker,
1764                                      mdres);
1765                 if (ret)
1766                         break;
1767         }
1768         if (ret)
1769                 mdrestore_destroy(mdres);
1770         return ret;
1771 }
1772
1773 static int fill_mdres_info(struct mdrestore_struct *mdres,
1774                            struct async_work *async)
1775 {
1776         struct btrfs_super_block *super;
1777         u8 *buffer = NULL;
1778         u8 *outbuf;
1779         int ret;
1780
1781         /* We've already been initialized */
1782         if (mdres->leafsize)
1783                 return 0;
1784
1785         if (mdres->compress_method == COMPRESS_ZLIB) {
1786                 size_t size = MAX_PENDING_SIZE * 2;
1787
1788                 buffer = malloc(MAX_PENDING_SIZE * 2);
1789                 if (!buffer)
1790                         return -ENOMEM;
1791                 ret = uncompress(buffer, (unsigned long *)&size,
1792                                  async->buffer, async->bufsize);
1793                 if (ret != Z_OK) {
1794                         fprintf(stderr, "Error decompressing %d\n", ret);
1795                         free(buffer);
1796                         return -EIO;
1797                 }
1798                 outbuf = buffer;
1799         } else {
1800                 outbuf = async->buffer;
1801         }
1802
1803         super = (struct btrfs_super_block *)outbuf;
1804         mdres->leafsize = btrfs_super_leafsize(super);
1805         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1806         memcpy(mdres->uuid, super->dev_item.uuid,
1807                        BTRFS_UUID_SIZE);
1808         mdres->devid = le64_to_cpu(super->dev_item.devid);
1809         free(buffer);
1810         return 0;
1811 }
1812
1813 static int add_cluster(struct meta_cluster *cluster,
1814                        struct mdrestore_struct *mdres, u64 *next)
1815 {
1816         struct meta_cluster_item *item;
1817         struct meta_cluster_header *header = &cluster->header;
1818         struct async_work *async;
1819         u64 bytenr;
1820         u32 i, nritems;
1821         int ret;
1822
1823         BUG_ON(mdres->num_items);
1824         mdres->compress_method = header->compress;
1825
1826         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1827         nritems = le32_to_cpu(header->nritems);
1828         for (i = 0; i < nritems; i++) {
1829                 item = &cluster->items[i];
1830                 async = calloc(1, sizeof(*async));
1831                 if (!async) {
1832                         fprintf(stderr, "Error allocating async\n");
1833                         return -ENOMEM;
1834                 }
1835                 async->start = le64_to_cpu(item->bytenr);
1836                 async->bufsize = le32_to_cpu(item->size);
1837                 async->buffer = malloc(async->bufsize);
1838                 if (!async->buffer) {
1839                         fprintf(stderr, "Error allocing async buffer\n");
1840                         free(async);
1841                         return -ENOMEM;
1842                 }
1843                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1844                 if (ret != 1) {
1845                         fprintf(stderr, "Error reading buffer %d\n", errno);
1846                         free(async->buffer);
1847                         free(async);
1848                         return -EIO;
1849                 }
1850                 bytenr += async->bufsize;
1851
1852                 pthread_mutex_lock(&mdres->mutex);
1853                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1854                         ret = fill_mdres_info(mdres, async);
1855                         if (ret) {
1856                                 fprintf(stderr, "Error setting up restore\n");
1857                                 pthread_mutex_unlock(&mdres->mutex);
1858                                 free(async->buffer);
1859                                 free(async);
1860                                 return ret;
1861                         }
1862                 }
1863                 list_add_tail(&async->list, &mdres->list);
1864                 mdres->num_items++;
1865                 pthread_cond_signal(&mdres->cond);
1866                 pthread_mutex_unlock(&mdres->mutex);
1867         }
1868         if (bytenr & BLOCK_MASK) {
1869                 char buffer[BLOCK_MASK];
1870                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1871
1872                 bytenr += size;
1873                 ret = fread(buffer, size, 1, mdres->in);
1874                 if (ret != 1) {
1875                         fprintf(stderr, "Error reading in buffer %d\n", errno);
1876                         return -EIO;
1877                 }
1878         }
1879         *next = bytenr;
1880         return 0;
1881 }
1882
1883 static int wait_for_worker(struct mdrestore_struct *mdres)
1884 {
1885         int ret = 0;
1886
1887         pthread_mutex_lock(&mdres->mutex);
1888         ret = mdres->error;
1889         while (!ret && mdres->num_items > 0) {
1890                 struct timespec ts = {
1891                         .tv_sec = 0,
1892                         .tv_nsec = 10000000,
1893                 };
1894                 pthread_mutex_unlock(&mdres->mutex);
1895                 nanosleep(&ts, NULL);
1896                 pthread_mutex_lock(&mdres->mutex);
1897                 ret = mdres->error;
1898         }
1899         pthread_mutex_unlock(&mdres->mutex);
1900         return ret;
1901 }
1902
1903 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
1904                             u64 bytenr, u64 item_bytenr, u32 bufsize,
1905                             u64 cluster_bytenr)
1906 {
1907         struct extent_buffer *eb;
1908         int ret = 0;
1909         int i;
1910
1911         eb = alloc_dummy_eb(bytenr, mdres->leafsize);
1912         if (!eb) {
1913                 ret = -ENOMEM;
1914                 goto out;
1915         }
1916
1917         while (item_bytenr != bytenr) {
1918                 buffer += mdres->leafsize;
1919                 item_bytenr += mdres->leafsize;
1920         }
1921
1922         memcpy(eb->data, buffer, mdres->leafsize);
1923         if (btrfs_header_bytenr(eb) != bytenr) {
1924                 fprintf(stderr, "Eb bytenr doesn't match found bytenr\n");
1925                 ret = -EIO;
1926                 goto out;
1927         }
1928
1929         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
1930                    BTRFS_FSID_SIZE)) {
1931                 fprintf(stderr, "Fsid doesn't match\n");
1932                 ret = -EIO;
1933                 goto out;
1934         }
1935
1936         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
1937                 fprintf(stderr, "Does not belong to the chunk tree\n");
1938                 ret = -EIO;
1939                 goto out;
1940         }
1941
1942         for (i = 0; i < btrfs_header_nritems(eb); i++) {
1943                 struct btrfs_chunk chunk;
1944                 struct fs_chunk *fs_chunk;
1945                 struct btrfs_key key;
1946
1947                 if (btrfs_header_level(eb)) {
1948                         u64 blockptr = btrfs_node_blockptr(eb, i);
1949
1950                         ret = search_for_chunk_blocks(mdres, blockptr,
1951                                                       cluster_bytenr);
1952                         if (ret)
1953                                 break;
1954                         continue;
1955                 }
1956
1957                 /* Yay a leaf!  We loves leafs! */
1958                 btrfs_item_key_to_cpu(eb, &key, i);
1959                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
1960                         continue;
1961
1962                 fs_chunk = malloc(sizeof(struct fs_chunk));
1963                 if (!fs_chunk) {
1964                         fprintf(stderr, "Erorr allocating chunk\n");
1965                         ret = -ENOMEM;
1966                         break;
1967                 }
1968                 memset(fs_chunk, 0, sizeof(*fs_chunk));
1969                 read_extent_buffer(eb, &chunk, btrfs_item_ptr_offset(eb, i),
1970                                    sizeof(chunk));
1971
1972                 fs_chunk->logical = key.offset;
1973                 fs_chunk->physical = btrfs_stack_stripe_offset(&chunk.stripe);
1974                 fs_chunk->bytes = btrfs_stack_chunk_length(&chunk);
1975                 tree_insert(&mdres->chunk_tree, &fs_chunk->n, chunk_cmp);
1976         }
1977 out:
1978         free(eb);
1979         return ret;
1980 }
1981
1982 /* If you have to ask you aren't worthy */
1983 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
1984                                    u64 search, u64 cluster_bytenr)
1985 {
1986         struct meta_cluster *cluster;
1987         struct meta_cluster_header *header;
1988         struct meta_cluster_item *item;
1989         u64 current_cluster = cluster_bytenr, bytenr;
1990         u64 item_bytenr;
1991         u32 bufsize, nritems, i;
1992         u8 *buffer, *tmp = NULL;
1993         int ret = 0;
1994
1995         cluster = malloc(BLOCK_SIZE);
1996         if (!cluster) {
1997                 fprintf(stderr, "Error allocating cluster\n");
1998                 return -ENOMEM;
1999         }
2000
2001         buffer = malloc(MAX_PENDING_SIZE * 2);
2002         if (!buffer) {
2003                 fprintf(stderr, "Error allocing buffer\n");
2004                 free(cluster);
2005                 return -ENOMEM;
2006         }
2007
2008         if (mdres->compress_method == COMPRESS_ZLIB) {
2009                 tmp = malloc(MAX_PENDING_SIZE * 2);
2010                 if (!tmp) {
2011                         fprintf(stderr, "Error allocing tmp buffer\n");
2012                         free(cluster);
2013                         free(buffer);
2014                         return -ENOMEM;
2015                 }
2016         }
2017
2018         bytenr = current_cluster;
2019         while (1) {
2020                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2021                         fprintf(stderr, "Error seeking: %d\n", errno);
2022                         ret = -EIO;
2023                         break;
2024                 }
2025
2026                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2027                 if (ret == 0) {
2028                         if (cluster_bytenr != 0) {
2029                                 cluster_bytenr = 0;
2030                                 current_cluster = 0;
2031                                 bytenr = 0;
2032                                 continue;
2033                         }
2034                         printf("ok this is where we screwed up?\n");
2035                         ret = -EIO;
2036                         break;
2037                 } else if (ret < 0) {
2038                         fprintf(stderr, "Error reading image\n");
2039                         break;
2040                 }
2041                 ret = 0;
2042
2043                 header = &cluster->header;
2044                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2045                     le64_to_cpu(header->bytenr) != current_cluster) {
2046                         fprintf(stderr, "bad header in metadump image\n");
2047                         ret = -EIO;
2048                         break;
2049                 }
2050
2051                 bytenr += BLOCK_SIZE;
2052                 nritems = le32_to_cpu(header->nritems);
2053                 for (i = 0; i < nritems; i++) {
2054                         size_t size;
2055
2056                         item = &cluster->items[i];
2057                         bufsize = le32_to_cpu(item->size);
2058                         item_bytenr = le64_to_cpu(item->bytenr);
2059
2060                         if (mdres->compress_method == COMPRESS_ZLIB) {
2061                                 ret = fread(tmp, bufsize, 1, mdres->in);
2062                                 if (ret != 1) {
2063                                         fprintf(stderr, "Error reading: %d\n",
2064                                                 errno);
2065                                         ret = -EIO;
2066                                         break;
2067                                 }
2068
2069                                 size = MAX_PENDING_SIZE * 2;
2070                                 ret = uncompress(buffer,
2071                                                  (unsigned long *)&size, tmp,
2072                                                  bufsize);
2073                                 if (ret != Z_OK) {
2074                                         fprintf(stderr, "Error decompressing "
2075                                                 "%d\n", ret);
2076                                         ret = -EIO;
2077                                         break;
2078                                 }
2079                         } else {
2080                                 ret = fread(buffer, bufsize, 1, mdres->in);
2081                                 if (ret != 1) {
2082                                         fprintf(stderr, "Error reading: %d\n",
2083                                                 errno);
2084                                         ret = -EIO;
2085                                         break;
2086                                 }
2087                                 size = bufsize;
2088                         }
2089                         ret = 0;
2090
2091                         if (item_bytenr <= search &&
2092                             item_bytenr + size > search) {
2093                                 ret = read_chunk_block(mdres, buffer, search,
2094                                                        item_bytenr, size,
2095                                                        current_cluster);
2096                                 if (!ret)
2097                                         ret = 1;
2098                                 break;
2099                         }
2100                         bytenr += bufsize;
2101                 }
2102                 if (ret) {
2103                         if (ret > 0)
2104                                 ret = 0;
2105                         break;
2106                 }
2107                 if (bytenr & BLOCK_MASK)
2108                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2109                 current_cluster = bytenr;
2110         }
2111
2112         free(tmp);
2113         free(buffer);
2114         free(cluster);
2115         return ret;
2116 }
2117
2118 static int build_chunk_tree(struct mdrestore_struct *mdres,
2119                             struct meta_cluster *cluster)
2120 {
2121         struct btrfs_super_block *super;
2122         struct meta_cluster_header *header;
2123         struct meta_cluster_item *item = NULL;
2124         u64 chunk_root_bytenr = 0;
2125         u32 i, nritems;
2126         u64 bytenr = 0;
2127         u8 *buffer;
2128         int ret;
2129
2130         /* We can't seek with stdin so don't bother doing this */
2131         if (mdres->in == stdin)
2132                 return 0;
2133
2134         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2135         if (ret <= 0) {
2136                 fprintf(stderr, "Error reading in cluster: %d\n", errno);
2137                 return -EIO;
2138         }
2139         ret = 0;
2140
2141         header = &cluster->header;
2142         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2143             le64_to_cpu(header->bytenr) != 0) {
2144                 fprintf(stderr, "bad header in metadump image\n");
2145                 return -EIO;
2146         }
2147
2148         bytenr += BLOCK_SIZE;
2149         mdres->compress_method = header->compress;
2150         nritems = le32_to_cpu(header->nritems);
2151         for (i = 0; i < nritems; i++) {
2152                 item = &cluster->items[i];
2153
2154                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2155                         break;
2156                 bytenr += le32_to_cpu(item->size);
2157                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2158                         fprintf(stderr, "Error seeking: %d\n", errno);
2159                         return -EIO;
2160                 }
2161         }
2162
2163         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2164                 fprintf(stderr, "Huh, didn't find the super?\n");
2165                 return -EINVAL;
2166         }
2167
2168         buffer = malloc(le32_to_cpu(item->size));
2169         if (!buffer) {
2170                 fprintf(stderr, "Error allocing buffer\n");
2171                 return -ENOMEM;
2172         }
2173
2174         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2175         if (ret != 1) {
2176                 fprintf(stderr, "Error reading buffer: %d\n", errno);
2177                 free(buffer);
2178                 return -EIO;
2179         }
2180
2181         if (mdres->compress_method == COMPRESS_ZLIB) {
2182                 size_t size = MAX_PENDING_SIZE * 2;
2183                 u8 *tmp;
2184
2185                 tmp = malloc(MAX_PENDING_SIZE * 2);
2186                 if (!tmp) {
2187                         free(buffer);
2188                         return -ENOMEM;
2189                 }
2190                 ret = uncompress(tmp, (unsigned long *)&size,
2191                                  buffer, le32_to_cpu(item->size));
2192                 if (ret != Z_OK) {
2193                         fprintf(stderr, "Error decompressing %d\n", ret);
2194                         free(buffer);
2195                         free(tmp);
2196                         return -EIO;
2197                 }
2198                 free(buffer);
2199                 buffer = tmp;
2200         }
2201
2202         super = (struct btrfs_super_block *)buffer;
2203         chunk_root_bytenr = btrfs_super_chunk_root(super);
2204         mdres->leafsize = btrfs_super_leafsize(super);
2205         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2206         memcpy(mdres->uuid, super->dev_item.uuid,
2207                        BTRFS_UUID_SIZE);
2208         mdres->devid = le64_to_cpu(super->dev_item.devid);
2209         free(buffer);
2210
2211         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2212 }
2213
2214 static int __restore_metadump(const char *input, FILE *out, int old_restore,
2215                               int num_threads, int fixup_offset,
2216                               const char *target, int multi_devices)
2217 {
2218         struct meta_cluster *cluster = NULL;
2219         struct meta_cluster_header *header;
2220         struct mdrestore_struct mdrestore;
2221         struct btrfs_fs_info *info = NULL;
2222         u64 bytenr = 0;
2223         FILE *in = NULL;
2224         int ret = 0;
2225
2226         if (!strcmp(input, "-")) {
2227                 in = stdin;
2228         } else {
2229                 in = fopen(input, "r");
2230                 if (!in) {
2231                         perror("unable to open metadump image");
2232                         return 1;
2233                 }
2234         }
2235
2236         /* NOTE: open with write mode */
2237         if (fixup_offset) {
2238                 BUG_ON(!target);
2239                 info = open_ctree_fs_info_restore(target, 0, 0, 1, 1);
2240                 if (!info) {
2241                         fprintf(stderr, "%s: open ctree failed\n", __func__);
2242                         ret = -EIO;
2243                         goto failed_open;
2244                 }
2245         }
2246
2247         cluster = malloc(BLOCK_SIZE);
2248         if (!cluster) {
2249                 fprintf(stderr, "Error allocating cluster\n");
2250                 ret = -ENOMEM;
2251                 goto failed_info;
2252         }
2253
2254         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2255                              fixup_offset, info, multi_devices);
2256         if (ret) {
2257                 fprintf(stderr, "Error initing mdrestore %d\n", ret);
2258                 goto failed_cluster;
2259         }
2260
2261         if (!multi_devices) {
2262                 ret = build_chunk_tree(&mdrestore, cluster);
2263                 if (ret)
2264                         goto out;
2265         }
2266
2267         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2268                 fprintf(stderr, "Error seeking %d\n", errno);
2269                 goto out;
2270         }
2271
2272         while (1) {
2273                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2274                 if (!ret)
2275                         break;
2276
2277                 header = &cluster->header;
2278                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2279                     le64_to_cpu(header->bytenr) != bytenr) {
2280                         fprintf(stderr, "bad header in metadump image\n");
2281                         ret = -EIO;
2282                         break;
2283                 }
2284                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2285                 if (ret) {
2286                         fprintf(stderr, "Error adding cluster\n");
2287                         break;
2288                 }
2289
2290                 ret = wait_for_worker(&mdrestore);
2291                 if (ret) {
2292                         fprintf(stderr, "One of the threads errored out %d\n",
2293                                 ret);
2294                         break;
2295                 }
2296         }
2297 out:
2298         mdrestore_destroy(&mdrestore);
2299 failed_cluster:
2300         free(cluster);
2301 failed_info:
2302         if (fixup_offset && info)
2303                 close_ctree(info->chunk_root);
2304 failed_open:
2305         if (in != stdin)
2306                 fclose(in);
2307         return ret;
2308 }
2309
2310 static int restore_metadump(const char *input, FILE *out, int old_restore,
2311                             int num_threads, int multi_devices)
2312 {
2313         return __restore_metadump(input, out, old_restore, num_threads, 0, NULL,
2314                                   multi_devices);
2315 }
2316
2317 static int fixup_metadump(const char *input, FILE *out, int num_threads,
2318                           const char *target)
2319 {
2320         return __restore_metadump(input, out, 0, num_threads, 1, target, 1);
2321 }
2322
2323 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2324                                        const char *other_dev, u64 cur_devid)
2325 {
2326         struct btrfs_key key;
2327         struct extent_buffer *leaf;
2328         struct btrfs_path path;
2329         struct btrfs_dev_item *dev_item;
2330         struct btrfs_super_block *disk_super;
2331         char dev_uuid[BTRFS_UUID_SIZE];
2332         char fs_uuid[BTRFS_UUID_SIZE];
2333         u64 devid, type, io_align, io_width;
2334         u64 sector_size, total_bytes, bytes_used;
2335         char *buf;
2336         int fp;
2337         int ret;
2338
2339         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2340         key.type = BTRFS_DEV_ITEM_KEY;
2341         key.offset = cur_devid;
2342
2343         btrfs_init_path(&path);
2344         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2345         if (ret) {
2346                 fprintf(stderr, "search key fails\n");
2347                 exit(1);
2348         }
2349
2350         leaf = path.nodes[0];
2351         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2352                                   struct btrfs_dev_item);
2353
2354         devid = btrfs_device_id(leaf, dev_item);
2355         if (devid != cur_devid) {
2356                 printk("devid %llu mismatch with %llu\n", devid, cur_devid);
2357                 exit(1);
2358         }
2359
2360         type = btrfs_device_type(leaf, dev_item);
2361         io_align = btrfs_device_io_align(leaf, dev_item);
2362         io_width = btrfs_device_io_width(leaf, dev_item);
2363         sector_size = btrfs_device_sector_size(leaf, dev_item);
2364         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2365         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2366         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2367         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2368
2369         btrfs_release_path(info->chunk_root, &path);
2370
2371         printk("update disk super on %s devid=%llu\n", other_dev, devid);
2372
2373         /* update other devices' super block */
2374         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2375         if (fp < 0) {
2376                 fprintf(stderr, "could not open %s\n", other_dev);
2377                 exit(1);
2378         }
2379
2380         buf = malloc(BTRFS_SUPER_INFO_SIZE);
2381         if (!buf) {
2382                 ret = -ENOMEM;
2383                 exit(1);
2384         }
2385
2386         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2387
2388         disk_super = (struct btrfs_super_block *)buf;
2389         dev_item = &disk_super->dev_item;
2390
2391         btrfs_set_stack_device_type(dev_item, type);
2392         btrfs_set_stack_device_id(dev_item, devid);
2393         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2394         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2395         btrfs_set_stack_device_io_align(dev_item, io_align);
2396         btrfs_set_stack_device_io_width(dev_item, io_width);
2397         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2398         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2399         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2400         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2401
2402         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2403         if (ret != BTRFS_SUPER_INFO_SIZE) {
2404                 ret = -EIO;
2405                 goto out;
2406         }
2407
2408         write_backup_supers(fp, (u8 *)buf);
2409
2410 out:
2411         free(buf);
2412         close(fp);
2413         return 0;
2414 }
2415
2416 static void print_usage(void)
2417 {
2418         fprintf(stderr, "usage: btrfs-image [options] source target\n");
2419         fprintf(stderr, "\t-r      \trestore metadump image\n");
2420         fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
2421         fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
2422         fprintf(stderr, "\t-o      \tdon't mess with the chunk tree when restoring\n");
2423         fprintf(stderr, "\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2424         fprintf(stderr, "\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2425         exit(1);
2426 }
2427
2428 int main(int argc, char *argv[])
2429 {
2430         char *source;
2431         char *target;
2432         int num_threads = 0;
2433         int compress_level = 0;
2434         int create = 1;
2435         int old_restore = 0;
2436         int walk_trees = 0;
2437         int multi_devices = 0;
2438         int ret;
2439         int sanitize = 0;
2440         int dev_cnt = 0;
2441         FILE *out;
2442
2443         while (1) {
2444                 int c = getopt(argc, argv, "rc:t:oswm");
2445                 if (c < 0)
2446                         break;
2447                 switch (c) {
2448                 case 'r':
2449                         create = 0;
2450                         break;
2451                 case 't':
2452                         num_threads = atoi(optarg);
2453                         if (num_threads <= 0 || num_threads > 32)
2454                                 print_usage();
2455                         break;
2456                 case 'c':
2457                         compress_level = atoi(optarg);
2458                         if (compress_level < 0 || compress_level > 9)
2459                                 print_usage();
2460                         break;
2461                 case 'o':
2462                         old_restore = 1;
2463                         break;
2464                 case 's':
2465                         sanitize++;
2466                         break;
2467                 case 'w':
2468                         walk_trees = 1;
2469                         break;
2470                 case 'm':
2471                         create = 0;
2472                         multi_devices = 1;
2473                         break;
2474                 default:
2475                         print_usage();
2476                 }
2477         }
2478
2479         if ((old_restore) && create)
2480                 print_usage();
2481
2482         argc = argc - optind;
2483         dev_cnt = argc - 1;
2484
2485         if (multi_devices && dev_cnt < 2)
2486                 print_usage();
2487         if (!multi_devices && dev_cnt != 1)
2488                 print_usage();
2489
2490         source = argv[optind];
2491         target = argv[optind + 1];
2492
2493         if (create && !strcmp(target, "-")) {
2494                 out = stdout;
2495         } else {
2496                 out = fopen(target, "w+");
2497                 if (!out) {
2498                         perror("unable to create target file");
2499                         exit(1);
2500                 }
2501         }
2502
2503         if (num_threads == 0 && compress_level > 0) {
2504                 num_threads = sysconf(_SC_NPROCESSORS_ONLN);
2505                 if (num_threads <= 0)
2506                         num_threads = 1;
2507         }
2508
2509         if (create)
2510                 ret = create_metadump(source, out, num_threads,
2511                                       compress_level, sanitize, walk_trees);
2512         else
2513                 ret = restore_metadump(source, out, old_restore, 1,
2514                                        multi_devices);
2515         if (ret) {
2516                 printk("%s failed (%s)\n", (create) ? "create" : "restore",
2517                        strerror(errno));
2518                 goto out;
2519         }
2520
2521          /* extended support for multiple devices */
2522         if (!create && multi_devices) {
2523                 struct btrfs_fs_info *info;
2524                 u64 total_devs;
2525                 int i;
2526
2527                 info = open_ctree_fs_info_restore(target, 0, 0, 0, 1);
2528                 if (!info) {
2529                         int e = errno;
2530                         fprintf(stderr, "unable to open %s error = %s\n",
2531                                 target, strerror(e));
2532                         return 1;
2533                 }
2534
2535                 total_devs = btrfs_super_num_devices(info->super_copy);
2536                 if (total_devs != dev_cnt) {
2537                         printk("it needs %llu devices but has only %d\n",
2538                                 total_devs, dev_cnt);
2539                         close_ctree(info->chunk_root);
2540                         goto out;
2541                 }
2542
2543                 /* update super block on other disks */
2544                 for (i = 2; i <= dev_cnt; i++) {
2545                         ret = update_disk_super_on_device(info,
2546                                         argv[optind + i], (u64)i);
2547                         if (ret) {
2548                                 printk("update disk super failed devid=%d (error=%d)\n",
2549                                         i, ret);
2550                                 close_ctree(info->chunk_root);
2551                                 exit(1);
2552                         }
2553                 }
2554
2555                 close_ctree(info->chunk_root);
2556
2557                 /* fix metadata block to map correct chunk */
2558                 ret = fixup_metadump(source, out, 1, target);
2559                 if (ret) {
2560                         fprintf(stderr, "fix metadump failed (error=%d)\n",
2561                                 ret);
2562                         exit(1);
2563                 }
2564         }
2565
2566 out:
2567         if (out == stdout)
2568                 fflush(out);
2569         else
2570                 fclose(out);
2571
2572         return ret;
2573 }