btrfs-progs: delete invalid output file when btrfs-image failed
[platform/upstream/btrfs-progs.git] / btrfs-image.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <pthread.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <dirent.h>
29 #include <zlib.h>
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "version.h"
37 #include "volumes.h"
38 #include "extent_io.h"
39
40 #define HEADER_MAGIC            0xbd5c25e27295668bULL
41 #define MAX_PENDING_SIZE        (256 * 1024)
42 #define BLOCK_SIZE              1024
43 #define BLOCK_MASK              (BLOCK_SIZE - 1)
44
45 #define COMPRESS_NONE           0
46 #define COMPRESS_ZLIB           1
47
48 struct meta_cluster_item {
49         __le64 bytenr;
50         __le32 size;
51 } __attribute__ ((__packed__));
52
53 struct meta_cluster_header {
54         __le64 magic;
55         __le64 bytenr;
56         __le32 nritems;
57         u8 compress;
58 } __attribute__ ((__packed__));
59
60 /* cluster header + index items + buffers */
61 struct meta_cluster {
62         struct meta_cluster_header header;
63         struct meta_cluster_item items[];
64 } __attribute__ ((__packed__));
65
66 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
67                            sizeof(struct meta_cluster_item))
68
69 struct fs_chunk {
70         u64 logical;
71         u64 physical;
72         u64 bytes;
73         struct rb_node n;
74 };
75
76 struct async_work {
77         struct list_head list;
78         struct list_head ordered;
79         u64 start;
80         u64 size;
81         u8 *buffer;
82         size_t bufsize;
83         int error;
84 };
85
86 struct metadump_struct {
87         struct btrfs_root *root;
88         FILE *out;
89
90         struct meta_cluster *cluster;
91
92         pthread_t *threads;
93         size_t num_threads;
94         pthread_mutex_t mutex;
95         pthread_cond_t cond;
96         struct rb_root name_tree;
97
98         struct list_head list;
99         struct list_head ordered;
100         size_t num_items;
101         size_t num_ready;
102
103         u64 pending_start;
104         u64 pending_size;
105
106         int compress_level;
107         int done;
108         int data;
109         int sanitize_names;
110 };
111
112 struct name {
113         struct rb_node n;
114         char *val;
115         char *sub;
116         u32 len;
117 };
118
119 struct mdrestore_struct {
120         FILE *in;
121         FILE *out;
122
123         pthread_t *threads;
124         size_t num_threads;
125         pthread_mutex_t mutex;
126         pthread_cond_t cond;
127
128         struct rb_root chunk_tree;
129         struct list_head list;
130         size_t num_items;
131         u32 leafsize;
132         u64 devid;
133         u8 uuid[BTRFS_UUID_SIZE];
134         u8 fsid[BTRFS_FSID_SIZE];
135
136         int compress_method;
137         int done;
138         int error;
139         int old_restore;
140         int fixup_offset;
141         int multi_devices;
142         struct btrfs_fs_info *info;
143 };
144
145 static void print_usage(void) __attribute__((noreturn));
146 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
147                                    u64 search, u64 cluster_bytenr);
148 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
149
150 static void csum_block(u8 *buf, size_t len)
151 {
152         char result[BTRFS_CRC32_SIZE];
153         u32 crc = ~(u32)0;
154         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
155         btrfs_csum_final(crc, result);
156         memcpy(buf, result, BTRFS_CRC32_SIZE);
157 }
158
159 static int has_name(struct btrfs_key *key)
160 {
161         switch (key->type) {
162         case BTRFS_DIR_ITEM_KEY:
163         case BTRFS_DIR_INDEX_KEY:
164         case BTRFS_INODE_REF_KEY:
165         case BTRFS_INODE_EXTREF_KEY:
166         case BTRFS_XATTR_ITEM_KEY:
167                 return 1;
168         default:
169                 break;
170         }
171
172         return 0;
173 }
174
175 static char *generate_garbage(u32 name_len)
176 {
177         char *buf = malloc(name_len);
178         int i;
179
180         if (!buf)
181                 return NULL;
182
183         for (i = 0; i < name_len; i++) {
184                 char c = rand() % 94 + 33;
185
186                 if (c == '/')
187                         c++;
188                 buf[i] = c;
189         }
190
191         return buf;
192 }
193
194 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
195 {
196         struct name *entry = rb_entry(a, struct name, n);
197         struct name *ins = rb_entry(b, struct name, n);
198         u32 len;
199
200         len = min(ins->len, entry->len);
201         return memcmp(ins->val, entry->val, len);
202 }
203
204 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
205 {
206         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, n);
207         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, n);
208
209         if (fuzz && ins->logical >= entry->logical &&
210             ins->logical < entry->logical + entry->bytes)
211                 return 0;
212
213         if (ins->logical < entry->logical)
214                 return -1;
215         else if (ins->logical > entry->logical)
216                 return 1;
217         return 0;
218 }
219
220 static void tree_insert(struct rb_root *root, struct rb_node *ins,
221                         int (*cmp)(struct rb_node *a, struct rb_node *b,
222                                    int fuzz))
223 {
224         struct rb_node ** p = &root->rb_node;
225         struct rb_node * parent = NULL;
226         int dir;
227
228         while(*p) {
229                 parent = *p;
230
231                 dir = cmp(*p, ins, 0);
232                 if (dir < 0)
233                         p = &(*p)->rb_left;
234                 else if (dir > 0)
235                         p = &(*p)->rb_right;
236                 else
237                         BUG();
238         }
239
240         rb_link_node(ins, parent, p);
241         rb_insert_color(ins, root);
242 }
243
244 static struct rb_node *tree_search(struct rb_root *root,
245                                    struct rb_node *search,
246                                    int (*cmp)(struct rb_node *a,
247                                               struct rb_node *b, int fuzz),
248                                    int fuzz)
249 {
250         struct rb_node *n = root->rb_node;
251         int dir;
252
253         while (n) {
254                 dir = cmp(n, search, fuzz);
255                 if (dir < 0)
256                         n = n->rb_left;
257                 else if (dir > 0)
258                         n = n->rb_right;
259                 else
260                         return n;
261         }
262
263         return NULL;
264 }
265
266 static char *find_collision(struct metadump_struct *md, char *name,
267                             u32 name_len)
268 {
269         struct name *val;
270         struct rb_node *entry;
271         struct name tmp;
272         unsigned long checksum;
273         int found = 0;
274         int i;
275
276         tmp.val = name;
277         tmp.len = name_len;
278         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
279         if (entry) {
280                 val = rb_entry(entry, struct name, n);
281                 free(name);
282                 return val->sub;
283         }
284
285         val = malloc(sizeof(struct name));
286         if (!val) {
287                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
288                 free(name);
289                 return NULL;
290         }
291
292         memset(val, 0, sizeof(*val));
293
294         val->val = name;
295         val->len = name_len;
296         val->sub = malloc(name_len);
297         if (!val->sub) {
298                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
299                 free(val);
300                 free(name);
301                 return NULL;
302         }
303
304         checksum = crc32c(~1, val->val, name_len);
305         memset(val->sub, ' ', name_len);
306         i = 0;
307         while (1) {
308                 if (crc32c(~1, val->sub, name_len) == checksum &&
309                     memcmp(val->sub, val->val, val->len)) {
310                         found = 1;
311                         break;
312                 }
313
314                 if (val->sub[i] == 127) {
315                         do {
316                                 i++;
317                                 if (i >= name_len)
318                                         break;
319                         } while (val->sub[i] == 127);
320
321                         if (i >= name_len)
322                                 break;
323                         val->sub[i]++;
324                         if (val->sub[i] == '/')
325                                 val->sub[i]++;
326                         memset(val->sub, ' ', i);
327                         i = 0;
328                         continue;
329                 } else {
330                         val->sub[i]++;
331                         if (val->sub[i] == '/')
332                                 val->sub[i]++;
333                 }
334         }
335
336         if (!found) {
337                 fprintf(stderr, "Couldn't find a collision for '%.*s', "
338                         "generating normal garbage, it won't match indexes\n",
339                         val->len, val->val);
340                 for (i = 0; i < name_len; i++) {
341                         char c = rand() % 94 + 33;
342
343                         if (c == '/')
344                                 c++;
345                         val->sub[i] = c;
346                 }
347         }
348
349         tree_insert(&md->name_tree, &val->n, name_cmp);
350         return val->sub;
351 }
352
353 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
354                               int slot)
355 {
356         struct btrfs_dir_item *dir_item;
357         char *buf;
358         char *garbage;
359         unsigned long name_ptr;
360         u32 total_len;
361         u32 cur = 0;
362         u32 this_len;
363         u32 name_len;
364         int free_garbage = (md->sanitize_names == 1);
365
366         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
367         total_len = btrfs_item_size_nr(eb, slot);
368         while (cur < total_len) {
369                 this_len = sizeof(*dir_item) +
370                         btrfs_dir_name_len(eb, dir_item) +
371                         btrfs_dir_data_len(eb, dir_item);
372                 name_ptr = (unsigned long)(dir_item + 1);
373                 name_len = btrfs_dir_name_len(eb, dir_item);
374
375                 if (md->sanitize_names > 1) {
376                         buf = malloc(name_len);
377                         if (!buf) {
378                                 fprintf(stderr, "Couldn't sanitize name, "
379                                         "enomem\n");
380                                 return;
381                         }
382                         read_extent_buffer(eb, buf, name_ptr, name_len);
383                         garbage = find_collision(md, buf, name_len);
384                 } else {
385                         garbage = generate_garbage(name_len);
386                 }
387                 if (!garbage) {
388                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
389                         return;
390                 }
391                 write_extent_buffer(eb, garbage, name_ptr, name_len);
392                 cur += this_len;
393                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
394                                                      this_len);
395                 if (free_garbage)
396                         free(garbage);
397         }
398 }
399
400 static void sanitize_inode_ref(struct metadump_struct *md,
401                                struct extent_buffer *eb, int slot, int ext)
402 {
403         struct btrfs_inode_extref *extref;
404         struct btrfs_inode_ref *ref;
405         char *garbage, *buf;
406         unsigned long ptr;
407         unsigned long name_ptr;
408         u32 item_size;
409         u32 cur_offset = 0;
410         int len;
411         int free_garbage = (md->sanitize_names == 1);
412
413         item_size = btrfs_item_size_nr(eb, slot);
414         ptr = btrfs_item_ptr_offset(eb, slot);
415         while (cur_offset < item_size) {
416                 if (ext) {
417                         extref = (struct btrfs_inode_extref *)(ptr +
418                                                                cur_offset);
419                         name_ptr = (unsigned long)(&extref->name);
420                         len = btrfs_inode_extref_name_len(eb, extref);
421                         cur_offset += sizeof(*extref);
422                 } else {
423                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
424                         len = btrfs_inode_ref_name_len(eb, ref);
425                         name_ptr = (unsigned long)(ref + 1);
426                         cur_offset += sizeof(*ref);
427                 }
428                 cur_offset += len;
429
430                 if (md->sanitize_names > 1) {
431                         buf = malloc(len);
432                         if (!buf) {
433                                 fprintf(stderr, "Couldn't sanitize name, "
434                                         "enomem\n");
435                                 return;
436                         }
437                         read_extent_buffer(eb, buf, name_ptr, len);
438                         garbage = find_collision(md, buf, len);
439                 } else {
440                         garbage = generate_garbage(len);
441                 }
442
443                 if (!garbage) {
444                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
445                         return;
446                 }
447                 write_extent_buffer(eb, garbage, name_ptr, len);
448                 if (free_garbage)
449                         free(garbage);
450         }
451 }
452
453 static void sanitize_xattr(struct metadump_struct *md,
454                            struct extent_buffer *eb, int slot)
455 {
456         struct btrfs_dir_item *dir_item;
457         unsigned long data_ptr;
458         u32 data_len;
459
460         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
461         data_len = btrfs_dir_data_len(eb, dir_item);
462
463         data_ptr = (unsigned long)((char *)(dir_item + 1) +
464                                    btrfs_dir_name_len(eb, dir_item));
465         memset_extent_buffer(eb, 0, data_ptr, data_len);
466 }
467
468 static void sanitize_name(struct metadump_struct *md, u8 *dst,
469                           struct extent_buffer *src, struct btrfs_key *key,
470                           int slot)
471 {
472         struct extent_buffer *eb;
473
474         eb = alloc_dummy_eb(src->start, src->len);
475         if (!eb) {
476                 fprintf(stderr, "Couldn't sanitize name, no memory\n");
477                 return;
478         }
479
480         memcpy(eb->data, dst, eb->len);
481
482         switch (key->type) {
483         case BTRFS_DIR_ITEM_KEY:
484         case BTRFS_DIR_INDEX_KEY:
485                 sanitize_dir_item(md, eb, slot);
486                 break;
487         case BTRFS_INODE_REF_KEY:
488                 sanitize_inode_ref(md, eb, slot, 0);
489                 break;
490         case BTRFS_INODE_EXTREF_KEY:
491                 sanitize_inode_ref(md, eb, slot, 1);
492                 break;
493         case BTRFS_XATTR_ITEM_KEY:
494                 sanitize_xattr(md, eb, slot);
495                 break;
496         default:
497                 break;
498         }
499
500         memcpy(dst, eb->data, eb->len);
501         free(eb);
502 }
503
504 /*
505  * zero inline extents and csum items
506  */
507 static void zero_items(struct metadump_struct *md, u8 *dst,
508                        struct extent_buffer *src)
509 {
510         struct btrfs_file_extent_item *fi;
511         struct btrfs_item *item;
512         struct btrfs_key key;
513         u32 nritems = btrfs_header_nritems(src);
514         size_t size;
515         unsigned long ptr;
516         int i, extent_type;
517
518         for (i = 0; i < nritems; i++) {
519                 item = btrfs_item_nr(i);
520                 btrfs_item_key_to_cpu(src, &key, i);
521                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
522                         size = btrfs_item_size_nr(src, i);
523                         memset(dst + btrfs_leaf_data(src) +
524                                btrfs_item_offset_nr(src, i), 0, size);
525                         continue;
526                 }
527
528                 if (md->sanitize_names && has_name(&key)) {
529                         sanitize_name(md, dst, src, &key, i);
530                         continue;
531                 }
532
533                 if (key.type != BTRFS_EXTENT_DATA_KEY)
534                         continue;
535
536                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
537                 extent_type = btrfs_file_extent_type(src, fi);
538                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
539                         continue;
540
541                 ptr = btrfs_file_extent_inline_start(fi);
542                 size = btrfs_file_extent_inline_item_len(src, item);
543                 memset(dst + ptr, 0, size);
544         }
545 }
546
547 /*
548  * copy buffer and zero useless data in the buffer
549  */
550 static void copy_buffer(struct metadump_struct *md, u8 *dst,
551                         struct extent_buffer *src)
552 {
553         int level;
554         size_t size;
555         u32 nritems;
556
557         memcpy(dst, src->data, src->len);
558         if (src->start == BTRFS_SUPER_INFO_OFFSET)
559                 return;
560
561         level = btrfs_header_level(src);
562         nritems = btrfs_header_nritems(src);
563
564         if (nritems == 0) {
565                 size = sizeof(struct btrfs_header);
566                 memset(dst + size, 0, src->len - size);
567         } else if (level == 0) {
568                 size = btrfs_leaf_data(src) +
569                         btrfs_item_offset_nr(src, nritems - 1) -
570                         btrfs_item_nr_offset(nritems);
571                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
572                 zero_items(md, dst, src);
573         } else {
574                 size = offsetof(struct btrfs_node, ptrs) +
575                         sizeof(struct btrfs_key_ptr) * nritems;
576                 memset(dst + size, 0, src->len - size);
577         }
578         csum_block(dst, src->len);
579 }
580
581 static void *dump_worker(void *data)
582 {
583         struct metadump_struct *md = (struct metadump_struct *)data;
584         struct async_work *async;
585         int ret;
586
587         while (1) {
588                 pthread_mutex_lock(&md->mutex);
589                 while (list_empty(&md->list)) {
590                         if (md->done) {
591                                 pthread_mutex_unlock(&md->mutex);
592                                 goto out;
593                         }
594                         pthread_cond_wait(&md->cond, &md->mutex);
595                 }
596                 async = list_entry(md->list.next, struct async_work, list);
597                 list_del_init(&async->list);
598                 pthread_mutex_unlock(&md->mutex);
599
600                 if (md->compress_level > 0) {
601                         u8 *orig = async->buffer;
602
603                         async->bufsize = compressBound(async->size);
604                         async->buffer = malloc(async->bufsize);
605
606                         ret = compress2(async->buffer,
607                                          (unsigned long *)&async->bufsize,
608                                          orig, async->size, md->compress_level);
609
610                         if (ret != Z_OK)
611                                 async->error = 1;
612
613                         free(orig);
614                 }
615
616                 pthread_mutex_lock(&md->mutex);
617                 md->num_ready++;
618                 pthread_mutex_unlock(&md->mutex);
619         }
620 out:
621         pthread_exit(NULL);
622 }
623
624 static void meta_cluster_init(struct metadump_struct *md, u64 start)
625 {
626         struct meta_cluster_header *header;
627
628         md->num_items = 0;
629         md->num_ready = 0;
630         header = &md->cluster->header;
631         header->magic = cpu_to_le64(HEADER_MAGIC);
632         header->bytenr = cpu_to_le64(start);
633         header->nritems = cpu_to_le32(0);
634         header->compress = md->compress_level > 0 ?
635                            COMPRESS_ZLIB : COMPRESS_NONE;
636 }
637
638 static void metadump_destroy(struct metadump_struct *md, int num_threads)
639 {
640         int i;
641         struct rb_node *n;
642
643         pthread_mutex_lock(&md->mutex);
644         md->done = 1;
645         pthread_cond_broadcast(&md->cond);
646         pthread_mutex_unlock(&md->mutex);
647
648         for (i = 0; i < num_threads; i++)
649                 pthread_join(md->threads[i], NULL);
650
651         pthread_cond_destroy(&md->cond);
652         pthread_mutex_destroy(&md->mutex);
653
654         while ((n = rb_first(&md->name_tree))) {
655                 struct name *name;
656
657                 name = rb_entry(n, struct name, n);
658                 rb_erase(n, &md->name_tree);
659                 free(name->val);
660                 free(name->sub);
661                 free(name);
662         }
663         free(md->threads);
664         free(md->cluster);
665 }
666
667 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
668                          FILE *out, int num_threads, int compress_level,
669                          int sanitize_names)
670 {
671         int i, ret = 0;
672
673         memset(md, 0, sizeof(*md));
674         pthread_cond_init(&md->cond, NULL);
675         pthread_mutex_init(&md->mutex, NULL);
676         INIT_LIST_HEAD(&md->list);
677         INIT_LIST_HEAD(&md->ordered);
678         md->root = root;
679         md->out = out;
680         md->pending_start = (u64)-1;
681         md->compress_level = compress_level;
682         md->cluster = calloc(1, BLOCK_SIZE);
683         md->sanitize_names = sanitize_names;
684         if (sanitize_names > 1)
685                 crc32c_optimization_init();
686
687         if (!md->cluster) {
688                 pthread_cond_destroy(&md->cond);
689                 pthread_mutex_destroy(&md->mutex);
690                 return -ENOMEM;
691         }
692
693         meta_cluster_init(md, 0);
694         if (!num_threads)
695                 return 0;
696
697         md->name_tree.rb_node = NULL;
698         md->num_threads = num_threads;
699         md->threads = calloc(num_threads, sizeof(pthread_t));
700         if (!md->threads) {
701                 free(md->cluster);
702                 pthread_cond_destroy(&md->cond);
703                 pthread_mutex_destroy(&md->mutex);
704                 return -ENOMEM;
705         }
706
707         for (i = 0; i < num_threads; i++) {
708                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
709                 if (ret)
710                         break;
711         }
712
713         if (ret)
714                 metadump_destroy(md, i + 1);
715
716         return ret;
717 }
718
719 static int write_zero(FILE *out, size_t size)
720 {
721         static char zero[BLOCK_SIZE];
722         return fwrite(zero, size, 1, out);
723 }
724
725 static int write_buffers(struct metadump_struct *md, u64 *next)
726 {
727         struct meta_cluster_header *header = &md->cluster->header;
728         struct meta_cluster_item *item;
729         struct async_work *async;
730         u64 bytenr = 0;
731         u32 nritems = 0;
732         int ret;
733         int err = 0;
734
735         if (list_empty(&md->ordered))
736                 goto out;
737
738         /* wait until all buffers are compressed */
739         while (md->num_items > md->num_ready) {
740                 struct timespec ts = {
741                         .tv_sec = 0,
742                         .tv_nsec = 10000000,
743                 };
744                 pthread_mutex_unlock(&md->mutex);
745                 nanosleep(&ts, NULL);
746                 pthread_mutex_lock(&md->mutex);
747         }
748
749         /* setup and write index block */
750         list_for_each_entry(async, &md->ordered, ordered) {
751                 item = md->cluster->items + nritems;
752                 item->bytenr = cpu_to_le64(async->start);
753                 item->size = cpu_to_le32(async->bufsize);
754                 nritems++;
755         }
756         header->nritems = cpu_to_le32(nritems);
757
758         ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
759         if (ret != 1) {
760                 fprintf(stderr, "Error writing out cluster: %d\n", errno);
761                 return -EIO;
762         }
763
764         /* write buffers */
765         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
766         while (!list_empty(&md->ordered)) {
767                 async = list_entry(md->ordered.next, struct async_work,
768                                    ordered);
769                 list_del_init(&async->ordered);
770
771                 bytenr += async->bufsize;
772                 if (!err)
773                         ret = fwrite(async->buffer, async->bufsize, 1,
774                                      md->out);
775                 if (ret != 1) {
776                         err = -EIO;
777                         ret = 0;
778                         fprintf(stderr, "Error writing out cluster: %d\n",
779                                 errno);
780                 }
781
782                 free(async->buffer);
783                 free(async);
784         }
785
786         /* zero unused space in the last block */
787         if (!err && bytenr & BLOCK_MASK) {
788                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
789
790                 bytenr += size;
791                 ret = write_zero(md->out, size);
792                 if (ret != 1) {
793                         fprintf(stderr, "Error zeroing out buffer: %d\n",
794                                 errno);
795                         err = -EIO;
796                 }
797         }
798 out:
799         *next = bytenr;
800         return err;
801 }
802
803 static int read_data_extent(struct metadump_struct *md,
804                             struct async_work *async)
805 {
806         struct btrfs_multi_bio *multi = NULL;
807         struct btrfs_device *device;
808         u64 bytes_left = async->size;
809         u64 logical = async->start;
810         u64 offset = 0;
811         u64 bytenr;
812         u64 read_len;
813         ssize_t done;
814         int fd;
815         int ret;
816
817         while (bytes_left) {
818                 read_len = bytes_left;
819                 ret = btrfs_map_block(&md->root->fs_info->mapping_tree, READ,
820                                       logical, &read_len, &multi, 0, NULL);
821                 if (ret) {
822                         fprintf(stderr, "Couldn't map data block %d\n", ret);
823                         return ret;
824                 }
825
826                 device = multi->stripes[0].dev;
827
828                 if (device->fd == 0) {
829                         fprintf(stderr,
830                                 "Device we need to read from is not open\n");
831                         free(multi);
832                         return -EIO;
833                 }
834                 fd = device->fd;
835                 bytenr = multi->stripes[0].physical;
836                 free(multi);
837
838                 read_len = min(read_len, bytes_left);
839                 done = pread64(fd, async->buffer+offset, read_len, bytenr);
840                 if (done < read_len) {
841                         if (done < 0)
842                                 fprintf(stderr, "Error reading extent %d\n",
843                                         errno);
844                         else
845                                 fprintf(stderr, "Short read\n");
846                         return -EIO;
847                 }
848
849                 bytes_left -= done;
850                 offset += done;
851                 logical += done;
852         }
853
854         return 0;
855 }
856
857 static int flush_pending(struct metadump_struct *md, int done)
858 {
859         struct async_work *async = NULL;
860         struct extent_buffer *eb;
861         u64 blocksize = md->root->nodesize;
862         u64 start;
863         u64 size;
864         size_t offset;
865         int ret = 0;
866
867         if (md->pending_size) {
868                 async = calloc(1, sizeof(*async));
869                 if (!async)
870                         return -ENOMEM;
871
872                 async->start = md->pending_start;
873                 async->size = md->pending_size;
874                 async->bufsize = async->size;
875                 async->buffer = malloc(async->bufsize);
876                 if (!async->buffer) {
877                         free(async);
878                         return -ENOMEM;
879                 }
880                 offset = 0;
881                 start = async->start;
882                 size = async->size;
883
884                 if (md->data) {
885                         ret = read_data_extent(md, async);
886                         if (ret) {
887                                 free(async->buffer);
888                                 free(async);
889                                 return ret;
890                         }
891                 }
892
893                 while (!md->data && size > 0) {
894                         u64 this_read = min(blocksize, size);
895                         eb = read_tree_block(md->root, start, this_read, 0);
896                         if (!eb) {
897                                 free(async->buffer);
898                                 free(async);
899                                 fprintf(stderr,
900                                         "Error reading metadata block\n");
901                                 return -EIO;
902                         }
903                         copy_buffer(md, async->buffer + offset, eb);
904                         free_extent_buffer(eb);
905                         start += this_read;
906                         offset += this_read;
907                         size -= this_read;
908                 }
909
910                 md->pending_start = (u64)-1;
911                 md->pending_size = 0;
912         } else if (!done) {
913                 return 0;
914         }
915
916         pthread_mutex_lock(&md->mutex);
917         if (async) {
918                 list_add_tail(&async->ordered, &md->ordered);
919                 md->num_items++;
920                 if (md->compress_level > 0) {
921                         list_add_tail(&async->list, &md->list);
922                         pthread_cond_signal(&md->cond);
923                 } else {
924                         md->num_ready++;
925                 }
926         }
927         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
928                 ret = write_buffers(md, &start);
929                 if (ret)
930                         fprintf(stderr, "Error writing buffers %d\n",
931                                 errno);
932                 else
933                         meta_cluster_init(md, start);
934         }
935         pthread_mutex_unlock(&md->mutex);
936         return ret;
937 }
938
939 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
940                       int data)
941 {
942         int ret;
943         if (md->data != data ||
944             md->pending_size + size > MAX_PENDING_SIZE ||
945             md->pending_start + md->pending_size != start) {
946                 ret = flush_pending(md, 0);
947                 if (ret)
948                         return ret;
949                 md->pending_start = start;
950         }
951         readahead_tree_block(md->root, start, size, 0);
952         md->pending_size += size;
953         md->data = data;
954         return 0;
955 }
956
957 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
958 static int is_tree_block(struct btrfs_root *extent_root,
959                          struct btrfs_path *path, u64 bytenr)
960 {
961         struct extent_buffer *leaf;
962         struct btrfs_key key;
963         u64 ref_objectid;
964         int ret;
965
966         leaf = path->nodes[0];
967         while (1) {
968                 struct btrfs_extent_ref_v0 *ref_item;
969                 path->slots[0]++;
970                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
971                         ret = btrfs_next_leaf(extent_root, path);
972                         if (ret < 0)
973                                 return ret;
974                         if (ret > 0)
975                                 break;
976                         leaf = path->nodes[0];
977                 }
978                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
979                 if (key.objectid != bytenr)
980                         break;
981                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
982                         continue;
983                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
984                                           struct btrfs_extent_ref_v0);
985                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
986                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
987                         return 1;
988                 break;
989         }
990         return 0;
991 }
992 #endif
993
994 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
995                             struct metadump_struct *metadump, int root_tree)
996 {
997         struct extent_buffer *tmp;
998         struct btrfs_root_item *ri;
999         struct btrfs_key key;
1000         u64 bytenr;
1001         int level;
1002         int nritems = 0;
1003         int i = 0;
1004         int ret;
1005
1006         ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0);
1007         if (ret) {
1008                 fprintf(stderr, "Error adding metadata block\n");
1009                 return ret;
1010         }
1011
1012         if (btrfs_header_level(eb) == 0 && !root_tree)
1013                 return 0;
1014
1015         level = btrfs_header_level(eb);
1016         nritems = btrfs_header_nritems(eb);
1017         for (i = 0; i < nritems; i++) {
1018                 if (level == 0) {
1019                         btrfs_item_key_to_cpu(eb, &key, i);
1020                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1021                                 continue;
1022                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1023                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1024                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
1025                         if (!tmp) {
1026                                 fprintf(stderr,
1027                                         "Error reading log root block\n");
1028                                 return -EIO;
1029                         }
1030                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1031                         free_extent_buffer(tmp);
1032                         if (ret)
1033                                 return ret;
1034                 } else {
1035                         bytenr = btrfs_node_blockptr(eb, i);
1036                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
1037                         if (!tmp) {
1038                                 fprintf(stderr, "Error reading log block\n");
1039                                 return -EIO;
1040                         }
1041                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1042                         free_extent_buffer(tmp);
1043                         if (ret)
1044                                 return ret;
1045                 }
1046         }
1047
1048         return 0;
1049 }
1050
1051 static int copy_log_trees(struct btrfs_root *root,
1052                           struct metadump_struct *metadump,
1053                           struct btrfs_path *path)
1054 {
1055         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1056
1057         if (blocknr == 0)
1058                 return 0;
1059
1060         if (!root->fs_info->log_root_tree ||
1061             !root->fs_info->log_root_tree->node) {
1062                 fprintf(stderr, "Error copying tree log, it wasn't setup\n");
1063                 return -EIO;
1064         }
1065
1066         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1067                                 metadump, 1);
1068 }
1069
1070 static int copy_space_cache(struct btrfs_root *root,
1071                             struct metadump_struct *metadump,
1072                             struct btrfs_path *path)
1073 {
1074         struct extent_buffer *leaf;
1075         struct btrfs_file_extent_item *fi;
1076         struct btrfs_key key;
1077         u64 bytenr, num_bytes;
1078         int ret;
1079
1080         root = root->fs_info->tree_root;
1081
1082         key.objectid = 0;
1083         key.type = BTRFS_EXTENT_DATA_KEY;
1084         key.offset = 0;
1085
1086         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1087         if (ret < 0) {
1088                 fprintf(stderr, "Error searching for free space inode %d\n",
1089                         ret);
1090                 return ret;
1091         }
1092
1093         leaf = path->nodes[0];
1094
1095         while (1) {
1096                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1097                         ret = btrfs_next_leaf(root, path);
1098                         if (ret < 0) {
1099                                 fprintf(stderr, "Error going to next leaf "
1100                                         "%d\n", ret);
1101                                 return ret;
1102                         }
1103                         if (ret > 0)
1104                                 break;
1105                         leaf = path->nodes[0];
1106                 }
1107
1108                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1109                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1110                         path->slots[0]++;
1111                         continue;
1112                 }
1113
1114                 fi = btrfs_item_ptr(leaf, path->slots[0],
1115                                     struct btrfs_file_extent_item);
1116                 if (btrfs_file_extent_type(leaf, fi) !=
1117                     BTRFS_FILE_EXTENT_REG) {
1118                         path->slots[0]++;
1119                         continue;
1120                 }
1121
1122                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1123                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1124                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1125                 if (ret) {
1126                         fprintf(stderr, "Error adding space cache blocks %d\n",
1127                                 ret);
1128                         btrfs_release_path(path);
1129                         return ret;
1130                 }
1131                 path->slots[0]++;
1132         }
1133
1134         return 0;
1135 }
1136
1137 static int copy_from_extent_tree(struct metadump_struct *metadump,
1138                                  struct btrfs_path *path)
1139 {
1140         struct btrfs_root *extent_root;
1141         struct extent_buffer *leaf;
1142         struct btrfs_extent_item *ei;
1143         struct btrfs_key key;
1144         u64 bytenr;
1145         u64 num_bytes;
1146         int ret;
1147
1148         extent_root = metadump->root->fs_info->extent_root;
1149         bytenr = BTRFS_SUPER_INFO_OFFSET + 4096;
1150         key.objectid = bytenr;
1151         key.type = BTRFS_EXTENT_ITEM_KEY;
1152         key.offset = 0;
1153
1154         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1155         if (ret < 0) {
1156                 fprintf(stderr, "Error searching extent root %d\n", ret);
1157                 return ret;
1158         }
1159         ret = 0;
1160
1161         leaf = path->nodes[0];
1162
1163         while (1) {
1164                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1165                         ret = btrfs_next_leaf(extent_root, path);
1166                         if (ret < 0) {
1167                                 fprintf(stderr, "Error going to next leaf %d"
1168                                         "\n", ret);
1169                                 break;
1170                         }
1171                         if (ret > 0) {
1172                                 ret = 0;
1173                                 break;
1174                         }
1175                         leaf = path->nodes[0];
1176                 }
1177
1178                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1179                 if (key.objectid < bytenr ||
1180                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1181                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1182                         path->slots[0]++;
1183                         continue;
1184                 }
1185
1186                 bytenr = key.objectid;
1187                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1188                         num_bytes = extent_root->leafsize;
1189                 else
1190                         num_bytes = key.offset;
1191
1192                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1193                         ei = btrfs_item_ptr(leaf, path->slots[0],
1194                                             struct btrfs_extent_item);
1195                         if (btrfs_extent_flags(leaf, ei) &
1196                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1197                                 ret = add_extent(bytenr, num_bytes, metadump,
1198                                                  0);
1199                                 if (ret) {
1200                                         fprintf(stderr, "Error adding block "
1201                                                 "%d\n", ret);
1202                                         break;
1203                                 }
1204                         }
1205                 } else {
1206 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1207                         ret = is_tree_block(extent_root, path, bytenr);
1208                         if (ret < 0) {
1209                                 fprintf(stderr, "Error checking tree block "
1210                                         "%d\n", ret);
1211                                 break;
1212                         }
1213
1214                         if (ret) {
1215                                 ret = add_extent(bytenr, num_bytes, metadump,
1216                                                  0);
1217                                 if (ret) {
1218                                         fprintf(stderr, "Error adding block "
1219                                                 "%d\n", ret);
1220                                         break;
1221                                 }
1222                         }
1223                         ret = 0;
1224 #else
1225                         fprintf(stderr, "Either extent tree corruption or "
1226                                 "you haven't built with V0 support\n");
1227                         ret = -EIO;
1228                         break;
1229 #endif
1230                 }
1231                 bytenr += num_bytes;
1232         }
1233
1234         btrfs_release_path(path);
1235
1236         return ret;
1237 }
1238
1239 static int create_metadump(const char *input, FILE *out, int num_threads,
1240                            int compress_level, int sanitize, int walk_trees)
1241 {
1242         struct btrfs_root *root;
1243         struct btrfs_path *path = NULL;
1244         struct metadump_struct metadump;
1245         int ret;
1246         int err = 0;
1247
1248         root = open_ctree(input, 0, 0);
1249         if (!root) {
1250                 fprintf(stderr, "Open ctree failed\n");
1251                 return -EIO;
1252         }
1253
1254         BUG_ON(root->nodesize != root->leafsize);
1255
1256         ret = metadump_init(&metadump, root, out, num_threads,
1257                             compress_level, sanitize);
1258         if (ret) {
1259                 fprintf(stderr, "Error initing metadump %d\n", ret);
1260                 close_ctree(root);
1261                 return ret;
1262         }
1263
1264         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump, 0);
1265         if (ret) {
1266                 fprintf(stderr, "Error adding metadata %d\n", ret);
1267                 err = ret;
1268                 goto out;
1269         }
1270
1271         path = btrfs_alloc_path();
1272         if (!path) {
1273                 fprintf(stderr, "Out of memory allocing path\n");
1274                 err = -ENOMEM;
1275                 goto out;
1276         }
1277
1278         if (walk_trees) {
1279                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1280                                        &metadump, 1);
1281                 if (ret) {
1282                         err = ret;
1283                         goto out;
1284                 }
1285
1286                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1287                                        &metadump, 1);
1288                 if (ret) {
1289                         err = ret;
1290                         goto out;
1291                 }
1292         } else {
1293                 ret = copy_from_extent_tree(&metadump, path);
1294                 if (ret) {
1295                         err = ret;
1296                         goto out;
1297                 }
1298         }
1299
1300         ret = copy_log_trees(root, &metadump, path);
1301         if (ret) {
1302                 err = ret;
1303                 goto out;
1304         }
1305
1306         ret = copy_space_cache(root, &metadump, path);
1307 out:
1308         ret = flush_pending(&metadump, 1);
1309         if (ret) {
1310                 if (!err)
1311                         err = ret;
1312                 fprintf(stderr, "Error flushing pending %d\n", ret);
1313         }
1314
1315         metadump_destroy(&metadump, num_threads);
1316
1317         btrfs_free_path(path);
1318         ret = close_ctree(root);
1319         return err ? err : ret;
1320 }
1321
1322 static void update_super_old(u8 *buffer)
1323 {
1324         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1325         struct btrfs_chunk *chunk;
1326         struct btrfs_disk_key *key;
1327         u32 sectorsize = btrfs_super_sectorsize(super);
1328         u64 flags = btrfs_super_flags(super);
1329
1330         flags |= BTRFS_SUPER_FLAG_METADUMP;
1331         btrfs_set_super_flags(super, flags);
1332
1333         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1334         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1335                                        sizeof(struct btrfs_disk_key));
1336
1337         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1338         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1339         btrfs_set_disk_key_offset(key, 0);
1340
1341         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1342         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1343         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1344         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1345         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1346         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1347         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1348         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1349         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1350         chunk->stripe.devid = super->dev_item.devid;
1351         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1352         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1353         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1354         csum_block(buffer, 4096);
1355 }
1356
1357 static int update_super(u8 *buffer)
1358 {
1359         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1360         struct btrfs_chunk *chunk;
1361         struct btrfs_disk_key *disk_key;
1362         struct btrfs_key key;
1363         u32 new_array_size = 0;
1364         u32 array_size;
1365         u32 cur = 0;
1366         u8 *ptr, *write_ptr;
1367         int old_num_stripes;
1368
1369         write_ptr = ptr = super->sys_chunk_array;
1370         array_size = btrfs_super_sys_array_size(super);
1371
1372         while (cur < array_size) {
1373                 disk_key = (struct btrfs_disk_key *)ptr;
1374                 btrfs_disk_key_to_cpu(&key, disk_key);
1375
1376                 new_array_size += sizeof(*disk_key);
1377                 memmove(write_ptr, ptr, sizeof(*disk_key));
1378
1379                 write_ptr += sizeof(*disk_key);
1380                 ptr += sizeof(*disk_key);
1381                 cur += sizeof(*disk_key);
1382
1383                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1384                         chunk = (struct btrfs_chunk *)ptr;
1385                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1386                         chunk = (struct btrfs_chunk *)write_ptr;
1387
1388                         memmove(write_ptr, ptr, sizeof(*chunk));
1389                         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1390                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1391                         btrfs_set_stack_chunk_type(chunk,
1392                                                    BTRFS_BLOCK_GROUP_SYSTEM);
1393                         chunk->stripe.devid = super->dev_item.devid;
1394                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1395                                BTRFS_UUID_SIZE);
1396                         new_array_size += sizeof(*chunk);
1397                 } else {
1398                         fprintf(stderr, "Bogus key in the sys chunk array "
1399                                 "%d\n", key.type);
1400                         return -EIO;
1401                 }
1402                 write_ptr += sizeof(*chunk);
1403                 ptr += btrfs_chunk_item_size(old_num_stripes);
1404                 cur += btrfs_chunk_item_size(old_num_stripes);
1405         }
1406
1407         btrfs_set_super_sys_array_size(super, new_array_size);
1408         csum_block(buffer, 4096);
1409
1410         return 0;
1411 }
1412
1413 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1414 {
1415         struct extent_buffer *eb;
1416
1417         eb = malloc(sizeof(struct extent_buffer) + size);
1418         if (!eb)
1419                 return NULL;
1420         memset(eb, 0, sizeof(struct extent_buffer) + size);
1421
1422         eb->start = bytenr;
1423         eb->len = size;
1424         return eb;
1425 }
1426
1427 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1428 {
1429         struct btrfs_item *item;
1430         u32 nritems;
1431         u32 old_size;
1432         u32 old_data_start;
1433         u32 size_diff;
1434         u32 data_end;
1435         int i;
1436
1437         old_size = btrfs_item_size_nr(eb, slot);
1438         if (old_size == new_size)
1439                 return;
1440
1441         nritems = btrfs_header_nritems(eb);
1442         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1443
1444         old_data_start = btrfs_item_offset_nr(eb, slot);
1445         size_diff = old_size - new_size;
1446
1447         for (i = slot; i < nritems; i++) {
1448                 u32 ioff;
1449                 item = btrfs_item_nr(i);
1450                 ioff = btrfs_item_offset(eb, item);
1451                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1452         }
1453
1454         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1455                               btrfs_leaf_data(eb) + data_end,
1456                               old_data_start + new_size - data_end);
1457         item = btrfs_item_nr(slot);
1458         btrfs_set_item_size(eb, item, new_size);
1459 }
1460
1461 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1462                                   struct async_work *async, u8 *buffer,
1463                                   size_t size)
1464 {
1465         struct extent_buffer *eb;
1466         size_t size_left = size;
1467         u64 bytenr = async->start;
1468         int i;
1469
1470         if (size_left % mdres->leafsize)
1471                 return 0;
1472
1473         eb = alloc_dummy_eb(bytenr, mdres->leafsize);
1474         if (!eb)
1475                 return -ENOMEM;
1476
1477         while (size_left) {
1478                 eb->start = bytenr;
1479                 memcpy(eb->data, buffer, mdres->leafsize);
1480
1481                 if (btrfs_header_bytenr(eb) != bytenr)
1482                         break;
1483                 if (memcmp(mdres->fsid,
1484                            eb->data + offsetof(struct btrfs_header, fsid),
1485                            BTRFS_FSID_SIZE))
1486                         break;
1487
1488                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1489                         goto next;
1490
1491                 if (btrfs_header_level(eb) != 0)
1492                         goto next;
1493
1494                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1495                         struct btrfs_chunk chunk;
1496                         struct btrfs_key key;
1497                         u64 type;
1498
1499                         btrfs_item_key_to_cpu(eb, &key, i);
1500                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1501                                 continue;
1502                         truncate_item(eb, i, sizeof(chunk));
1503                         read_extent_buffer(eb, &chunk,
1504                                            btrfs_item_ptr_offset(eb, i),
1505                                            sizeof(chunk));
1506
1507                         /* Zero out the RAID profile */
1508                         type = btrfs_stack_chunk_type(&chunk);
1509                         type &= (BTRFS_BLOCK_GROUP_DATA |
1510                                  BTRFS_BLOCK_GROUP_SYSTEM |
1511                                  BTRFS_BLOCK_GROUP_METADATA |
1512                                  BTRFS_BLOCK_GROUP_DUP);
1513                         btrfs_set_stack_chunk_type(&chunk, type);
1514
1515                         btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1516                         btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1517                         btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid);
1518                         memcpy(chunk.stripe.dev_uuid, mdres->uuid,
1519                                BTRFS_UUID_SIZE);
1520                         write_extent_buffer(eb, &chunk,
1521                                             btrfs_item_ptr_offset(eb, i),
1522                                             sizeof(chunk));
1523                 }
1524                 memcpy(buffer, eb->data, eb->len);
1525                 csum_block(buffer, eb->len);
1526 next:
1527                 size_left -= mdres->leafsize;
1528                 buffer += mdres->leafsize;
1529                 bytenr += mdres->leafsize;
1530         }
1531
1532         free(eb);
1533         return 0;
1534 }
1535
1536 static void write_backup_supers(int fd, u8 *buf)
1537 {
1538         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1539         struct stat st;
1540         u64 size;
1541         u64 bytenr;
1542         int i;
1543         int ret;
1544
1545         if (fstat(fd, &st)) {
1546                 fprintf(stderr, "Couldn't stat restore point, won't be able "
1547                         "to write backup supers: %d\n", errno);
1548                 return;
1549         }
1550
1551         size = btrfs_device_size(fd, &st);
1552
1553         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1554                 bytenr = btrfs_sb_offset(i);
1555                 if (bytenr + 4096 > size)
1556                         break;
1557                 btrfs_set_super_bytenr(super, bytenr);
1558                 csum_block(buf, 4096);
1559                 ret = pwrite64(fd, buf, 4096, bytenr);
1560                 if (ret < 4096) {
1561                         if (ret < 0)
1562                                 fprintf(stderr, "Problem writing out backup "
1563                                         "super block %d, err %d\n", i, errno);
1564                         else
1565                                 fprintf(stderr, "Short write writing out "
1566                                         "backup super block\n");
1567                         break;
1568                 }
1569         }
1570 }
1571
1572 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size)
1573 {
1574         struct fs_chunk *fs_chunk;
1575         struct rb_node *entry;
1576         struct fs_chunk search;
1577         u64 offset;
1578
1579         if (logical == BTRFS_SUPER_INFO_OFFSET)
1580                 return logical;
1581
1582         search.logical = logical;
1583         entry = tree_search(&mdres->chunk_tree, &search.n, chunk_cmp, 1);
1584         if (!entry) {
1585                 if (mdres->in != stdin)
1586                         printf("Couldn't find a chunk, using logical\n");
1587                 return logical;
1588         }
1589         fs_chunk = rb_entry(entry, struct fs_chunk, n);
1590         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
1591                 BUG();
1592         offset = search.logical - fs_chunk->logical;
1593
1594         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
1595         return fs_chunk->physical + offset;
1596 }
1597
1598 static void *restore_worker(void *data)
1599 {
1600         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1601         struct async_work *async;
1602         size_t size;
1603         u8 *buffer;
1604         u8 *outbuf;
1605         int outfd;
1606         int ret;
1607         int compress_size = MAX_PENDING_SIZE * 4;
1608
1609         outfd = fileno(mdres->out);
1610         buffer = malloc(compress_size);
1611         if (!buffer) {
1612                 fprintf(stderr, "Error allocing buffer\n");
1613                 pthread_mutex_lock(&mdres->mutex);
1614                 if (!mdres->error)
1615                         mdres->error = -ENOMEM;
1616                 pthread_mutex_unlock(&mdres->mutex);
1617                 goto out;
1618         }
1619
1620         while (1) {
1621                 u64 bytenr;
1622                 off_t offset = 0;
1623                 int err = 0;
1624
1625                 pthread_mutex_lock(&mdres->mutex);
1626                 while (!mdres->leafsize || list_empty(&mdres->list)) {
1627                         if (mdres->done) {
1628                                 pthread_mutex_unlock(&mdres->mutex);
1629                                 goto out;
1630                         }
1631                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1632                 }
1633                 async = list_entry(mdres->list.next, struct async_work, list);
1634                 list_del_init(&async->list);
1635                 pthread_mutex_unlock(&mdres->mutex);
1636
1637                 if (mdres->compress_method == COMPRESS_ZLIB) {
1638                         size = compress_size; 
1639                         ret = uncompress(buffer, (unsigned long *)&size,
1640                                          async->buffer, async->bufsize);
1641                         if (ret != Z_OK) {
1642                                 fprintf(stderr, "Error decompressing %d\n",
1643                                         ret);
1644                                 err = -EIO;
1645                         }
1646                         outbuf = buffer;
1647                 } else {
1648                         outbuf = async->buffer;
1649                         size = async->bufsize;
1650                 }
1651
1652                 if (!mdres->multi_devices) {
1653                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1654                                 if (mdres->old_restore) {
1655                                         update_super_old(outbuf);
1656                                 } else {
1657                                         ret = update_super(outbuf);
1658                                         if (ret)
1659                                                 err = ret;
1660                                 }
1661                         } else if (!mdres->old_restore) {
1662                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1663                                 if (ret)
1664                                         err = ret;
1665                         }
1666                 }
1667
1668                 if (!mdres->fixup_offset) {
1669                         while (size) {
1670                                 u64 chunk_size = size;
1671                                 if (!mdres->multi_devices)
1672                                         bytenr = logical_to_physical(mdres,
1673                                                                      async->start + offset,
1674                                                                      &chunk_size);
1675                                 else
1676                                         bytenr = async->start + offset;
1677
1678                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1679                                                bytenr);
1680                                 if (ret != chunk_size) {
1681                                         if (ret < 0) {
1682                                                 fprintf(stderr, "Error writing to "
1683                                                         "device %d\n", errno);
1684                                                 err = errno;
1685                                                 break;
1686                                         } else {
1687                                                 fprintf(stderr, "Short write\n");
1688                                                 err = -EIO;
1689                                                 break;
1690                                         }
1691                                 }
1692                                 size -= chunk_size;
1693                                 offset += chunk_size;
1694                         }
1695                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1696                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1697                         if (ret) {
1698                                 printk("Error write data\n");
1699                                 exit(1);
1700                         }
1701                 }
1702
1703
1704                 /* backup super blocks are already there at fixup_offset stage */
1705                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1706                         write_backup_supers(outfd, outbuf);
1707
1708                 pthread_mutex_lock(&mdres->mutex);
1709                 if (err && !mdres->error)
1710                         mdres->error = err;
1711                 mdres->num_items--;
1712                 pthread_mutex_unlock(&mdres->mutex);
1713
1714                 free(async->buffer);
1715                 free(async);
1716         }
1717 out:
1718         free(buffer);
1719         pthread_exit(NULL);
1720 }
1721
1722 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1723 {
1724         struct rb_node *n;
1725         int i;
1726
1727         while ((n = rb_first(&mdres->chunk_tree))) {
1728                 struct fs_chunk *entry;
1729
1730                 entry = rb_entry(n, struct fs_chunk, n);
1731                 rb_erase(n, &mdres->chunk_tree);
1732                 free(entry);
1733         }
1734         pthread_mutex_lock(&mdres->mutex);
1735         mdres->done = 1;
1736         pthread_cond_broadcast(&mdres->cond);
1737         pthread_mutex_unlock(&mdres->mutex);
1738
1739         for (i = 0; i < num_threads; i++)
1740                 pthread_join(mdres->threads[i], NULL);
1741
1742         pthread_cond_destroy(&mdres->cond);
1743         pthread_mutex_destroy(&mdres->mutex);
1744         free(mdres->threads);
1745 }
1746
1747 static int mdrestore_init(struct mdrestore_struct *mdres,
1748                           FILE *in, FILE *out, int old_restore,
1749                           int num_threads, int fixup_offset,
1750                           struct btrfs_fs_info *info, int multi_devices)
1751 {
1752         int i, ret = 0;
1753
1754         memset(mdres, 0, sizeof(*mdres));
1755         pthread_cond_init(&mdres->cond, NULL);
1756         pthread_mutex_init(&mdres->mutex, NULL);
1757         INIT_LIST_HEAD(&mdres->list);
1758         mdres->in = in;
1759         mdres->out = out;
1760         mdres->old_restore = old_restore;
1761         mdres->chunk_tree.rb_node = NULL;
1762         mdres->fixup_offset = fixup_offset;
1763         mdres->info = info;
1764         mdres->multi_devices = multi_devices;
1765
1766         if (!num_threads)
1767                 return 0;
1768
1769         mdres->num_threads = num_threads;
1770         mdres->threads = calloc(num_threads, sizeof(pthread_t));
1771         if (!mdres->threads)
1772                 return -ENOMEM;
1773         for (i = 0; i < num_threads; i++) {
1774                 ret = pthread_create(mdres->threads + i, NULL, restore_worker,
1775                                      mdres);
1776                 if (ret)
1777                         break;
1778         }
1779         if (ret)
1780                 mdrestore_destroy(mdres, i + 1);
1781         return ret;
1782 }
1783
1784 static int fill_mdres_info(struct mdrestore_struct *mdres,
1785                            struct async_work *async)
1786 {
1787         struct btrfs_super_block *super;
1788         u8 *buffer = NULL;
1789         u8 *outbuf;
1790         int ret;
1791
1792         /* We've already been initialized */
1793         if (mdres->leafsize)
1794                 return 0;
1795
1796         if (mdres->compress_method == COMPRESS_ZLIB) {
1797                 size_t size = MAX_PENDING_SIZE * 2;
1798
1799                 buffer = malloc(MAX_PENDING_SIZE * 2);
1800                 if (!buffer)
1801                         return -ENOMEM;
1802                 ret = uncompress(buffer, (unsigned long *)&size,
1803                                  async->buffer, async->bufsize);
1804                 if (ret != Z_OK) {
1805                         fprintf(stderr, "Error decompressing %d\n", ret);
1806                         free(buffer);
1807                         return -EIO;
1808                 }
1809                 outbuf = buffer;
1810         } else {
1811                 outbuf = async->buffer;
1812         }
1813
1814         super = (struct btrfs_super_block *)outbuf;
1815         mdres->leafsize = btrfs_super_leafsize(super);
1816         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1817         memcpy(mdres->uuid, super->dev_item.uuid,
1818                        BTRFS_UUID_SIZE);
1819         mdres->devid = le64_to_cpu(super->dev_item.devid);
1820         free(buffer);
1821         return 0;
1822 }
1823
1824 static int add_cluster(struct meta_cluster *cluster,
1825                        struct mdrestore_struct *mdres, u64 *next)
1826 {
1827         struct meta_cluster_item *item;
1828         struct meta_cluster_header *header = &cluster->header;
1829         struct async_work *async;
1830         u64 bytenr;
1831         u32 i, nritems;
1832         int ret;
1833
1834         BUG_ON(mdres->num_items);
1835         mdres->compress_method = header->compress;
1836
1837         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1838         nritems = le32_to_cpu(header->nritems);
1839         for (i = 0; i < nritems; i++) {
1840                 item = &cluster->items[i];
1841                 async = calloc(1, sizeof(*async));
1842                 if (!async) {
1843                         fprintf(stderr, "Error allocating async\n");
1844                         return -ENOMEM;
1845                 }
1846                 async->start = le64_to_cpu(item->bytenr);
1847                 async->bufsize = le32_to_cpu(item->size);
1848                 async->buffer = malloc(async->bufsize);
1849                 if (!async->buffer) {
1850                         fprintf(stderr, "Error allocing async buffer\n");
1851                         free(async);
1852                         return -ENOMEM;
1853                 }
1854                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1855                 if (ret != 1) {
1856                         fprintf(stderr, "Error reading buffer %d\n", errno);
1857                         free(async->buffer);
1858                         free(async);
1859                         return -EIO;
1860                 }
1861                 bytenr += async->bufsize;
1862
1863                 pthread_mutex_lock(&mdres->mutex);
1864                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1865                         ret = fill_mdres_info(mdres, async);
1866                         if (ret) {
1867                                 fprintf(stderr, "Error setting up restore\n");
1868                                 pthread_mutex_unlock(&mdres->mutex);
1869                                 free(async->buffer);
1870                                 free(async);
1871                                 return ret;
1872                         }
1873                 }
1874                 list_add_tail(&async->list, &mdres->list);
1875                 mdres->num_items++;
1876                 pthread_cond_signal(&mdres->cond);
1877                 pthread_mutex_unlock(&mdres->mutex);
1878         }
1879         if (bytenr & BLOCK_MASK) {
1880                 char buffer[BLOCK_MASK];
1881                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1882
1883                 bytenr += size;
1884                 ret = fread(buffer, size, 1, mdres->in);
1885                 if (ret != 1) {
1886                         fprintf(stderr, "Error reading in buffer %d\n", errno);
1887                         return -EIO;
1888                 }
1889         }
1890         *next = bytenr;
1891         return 0;
1892 }
1893
1894 static int wait_for_worker(struct mdrestore_struct *mdres)
1895 {
1896         int ret = 0;
1897
1898         pthread_mutex_lock(&mdres->mutex);
1899         ret = mdres->error;
1900         while (!ret && mdres->num_items > 0) {
1901                 struct timespec ts = {
1902                         .tv_sec = 0,
1903                         .tv_nsec = 10000000,
1904                 };
1905                 pthread_mutex_unlock(&mdres->mutex);
1906                 nanosleep(&ts, NULL);
1907                 pthread_mutex_lock(&mdres->mutex);
1908                 ret = mdres->error;
1909         }
1910         pthread_mutex_unlock(&mdres->mutex);
1911         return ret;
1912 }
1913
1914 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
1915                             u64 bytenr, u64 item_bytenr, u32 bufsize,
1916                             u64 cluster_bytenr)
1917 {
1918         struct extent_buffer *eb;
1919         int ret = 0;
1920         int i;
1921
1922         eb = alloc_dummy_eb(bytenr, mdres->leafsize);
1923         if (!eb) {
1924                 ret = -ENOMEM;
1925                 goto out;
1926         }
1927
1928         while (item_bytenr != bytenr) {
1929                 buffer += mdres->leafsize;
1930                 item_bytenr += mdres->leafsize;
1931         }
1932
1933         memcpy(eb->data, buffer, mdres->leafsize);
1934         if (btrfs_header_bytenr(eb) != bytenr) {
1935                 fprintf(stderr, "Eb bytenr doesn't match found bytenr\n");
1936                 ret = -EIO;
1937                 goto out;
1938         }
1939
1940         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
1941                    BTRFS_FSID_SIZE)) {
1942                 fprintf(stderr, "Fsid doesn't match\n");
1943                 ret = -EIO;
1944                 goto out;
1945         }
1946
1947         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
1948                 fprintf(stderr, "Does not belong to the chunk tree\n");
1949                 ret = -EIO;
1950                 goto out;
1951         }
1952
1953         for (i = 0; i < btrfs_header_nritems(eb); i++) {
1954                 struct btrfs_chunk chunk;
1955                 struct fs_chunk *fs_chunk;
1956                 struct btrfs_key key;
1957
1958                 if (btrfs_header_level(eb)) {
1959                         u64 blockptr = btrfs_node_blockptr(eb, i);
1960
1961                         ret = search_for_chunk_blocks(mdres, blockptr,
1962                                                       cluster_bytenr);
1963                         if (ret)
1964                                 break;
1965                         continue;
1966                 }
1967
1968                 /* Yay a leaf!  We loves leafs! */
1969                 btrfs_item_key_to_cpu(eb, &key, i);
1970                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
1971                         continue;
1972
1973                 fs_chunk = malloc(sizeof(struct fs_chunk));
1974                 if (!fs_chunk) {
1975                         fprintf(stderr, "Erorr allocating chunk\n");
1976                         ret = -ENOMEM;
1977                         break;
1978                 }
1979                 memset(fs_chunk, 0, sizeof(*fs_chunk));
1980                 read_extent_buffer(eb, &chunk, btrfs_item_ptr_offset(eb, i),
1981                                    sizeof(chunk));
1982
1983                 fs_chunk->logical = key.offset;
1984                 fs_chunk->physical = btrfs_stack_stripe_offset(&chunk.stripe);
1985                 fs_chunk->bytes = btrfs_stack_chunk_length(&chunk);
1986                 tree_insert(&mdres->chunk_tree, &fs_chunk->n, chunk_cmp);
1987         }
1988 out:
1989         free(eb);
1990         return ret;
1991 }
1992
1993 /* If you have to ask you aren't worthy */
1994 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
1995                                    u64 search, u64 cluster_bytenr)
1996 {
1997         struct meta_cluster *cluster;
1998         struct meta_cluster_header *header;
1999         struct meta_cluster_item *item;
2000         u64 current_cluster = cluster_bytenr, bytenr;
2001         u64 item_bytenr;
2002         u32 bufsize, nritems, i;
2003         u32 max_size = MAX_PENDING_SIZE * 2;
2004         u8 *buffer, *tmp = NULL;
2005         int ret = 0;
2006
2007         cluster = malloc(BLOCK_SIZE);
2008         if (!cluster) {
2009                 fprintf(stderr, "Error allocating cluster\n");
2010                 return -ENOMEM;
2011         }
2012
2013         buffer = malloc(max_size);
2014         if (!buffer) {
2015                 fprintf(stderr, "Error allocing buffer\n");
2016                 free(cluster);
2017                 return -ENOMEM;
2018         }
2019
2020         if (mdres->compress_method == COMPRESS_ZLIB) {
2021                 tmp = malloc(max_size);
2022                 if (!tmp) {
2023                         fprintf(stderr, "Error allocing tmp buffer\n");
2024                         free(cluster);
2025                         free(buffer);
2026                         return -ENOMEM;
2027                 }
2028         }
2029
2030         bytenr = current_cluster;
2031         while (1) {
2032                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2033                         fprintf(stderr, "Error seeking: %d\n", errno);
2034                         ret = -EIO;
2035                         break;
2036                 }
2037
2038                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2039                 if (ret == 0) {
2040                         if (cluster_bytenr != 0) {
2041                                 cluster_bytenr = 0;
2042                                 current_cluster = 0;
2043                                 bytenr = 0;
2044                                 continue;
2045                         }
2046                         printf("ok this is where we screwed up?\n");
2047                         ret = -EIO;
2048                         break;
2049                 } else if (ret < 0) {
2050                         fprintf(stderr, "Error reading image\n");
2051                         break;
2052                 }
2053                 ret = 0;
2054
2055                 header = &cluster->header;
2056                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2057                     le64_to_cpu(header->bytenr) != current_cluster) {
2058                         fprintf(stderr, "bad header in metadump image\n");
2059                         ret = -EIO;
2060                         break;
2061                 }
2062
2063                 bytenr += BLOCK_SIZE;
2064                 nritems = le32_to_cpu(header->nritems);
2065                 for (i = 0; i < nritems; i++) {
2066                         size_t size;
2067
2068                         item = &cluster->items[i];
2069                         bufsize = le32_to_cpu(item->size);
2070                         item_bytenr = le64_to_cpu(item->bytenr);
2071
2072                         if (bufsize > max_size) {
2073                                 fprintf(stderr, "item %u size %u too big\n",
2074                                         i, bufsize);
2075                                 ret = -EIO;
2076                                 break;
2077                         }
2078
2079                         if (mdres->compress_method == COMPRESS_ZLIB) {
2080                                 ret = fread(tmp, bufsize, 1, mdres->in);
2081                                 if (ret != 1) {
2082                                         fprintf(stderr, "Error reading: %d\n",
2083                                                 errno);
2084                                         ret = -EIO;
2085                                         break;
2086                                 }
2087
2088                                 size = max_size;
2089                                 ret = uncompress(buffer,
2090                                                  (unsigned long *)&size, tmp,
2091                                                  bufsize);
2092                                 if (ret != Z_OK) {
2093                                         fprintf(stderr, "Error decompressing "
2094                                                 "%d\n", ret);
2095                                         ret = -EIO;
2096                                         break;
2097                                 }
2098                         } else {
2099                                 ret = fread(buffer, bufsize, 1, mdres->in);
2100                                 if (ret != 1) {
2101                                         fprintf(stderr, "Error reading: %d\n",
2102                                                 errno);
2103                                         ret = -EIO;
2104                                         break;
2105                                 }
2106                                 size = bufsize;
2107                         }
2108                         ret = 0;
2109
2110                         if (item_bytenr <= search &&
2111                             item_bytenr + size > search) {
2112                                 ret = read_chunk_block(mdres, buffer, search,
2113                                                        item_bytenr, size,
2114                                                        current_cluster);
2115                                 if (!ret)
2116                                         ret = 1;
2117                                 break;
2118                         }
2119                         bytenr += bufsize;
2120                 }
2121                 if (ret) {
2122                         if (ret > 0)
2123                                 ret = 0;
2124                         break;
2125                 }
2126                 if (bytenr & BLOCK_MASK)
2127                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2128                 current_cluster = bytenr;
2129         }
2130
2131         free(tmp);
2132         free(buffer);
2133         free(cluster);
2134         return ret;
2135 }
2136
2137 static int build_chunk_tree(struct mdrestore_struct *mdres,
2138                             struct meta_cluster *cluster)
2139 {
2140         struct btrfs_super_block *super;
2141         struct meta_cluster_header *header;
2142         struct meta_cluster_item *item = NULL;
2143         u64 chunk_root_bytenr = 0;
2144         u32 i, nritems;
2145         u64 bytenr = 0;
2146         u8 *buffer;
2147         int ret;
2148
2149         /* We can't seek with stdin so don't bother doing this */
2150         if (mdres->in == stdin)
2151                 return 0;
2152
2153         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2154         if (ret <= 0) {
2155                 fprintf(stderr, "Error reading in cluster: %d\n", errno);
2156                 return -EIO;
2157         }
2158         ret = 0;
2159
2160         header = &cluster->header;
2161         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2162             le64_to_cpu(header->bytenr) != 0) {
2163                 fprintf(stderr, "bad header in metadump image\n");
2164                 return -EIO;
2165         }
2166
2167         bytenr += BLOCK_SIZE;
2168         mdres->compress_method = header->compress;
2169         nritems = le32_to_cpu(header->nritems);
2170         for (i = 0; i < nritems; i++) {
2171                 item = &cluster->items[i];
2172
2173                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2174                         break;
2175                 bytenr += le32_to_cpu(item->size);
2176                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2177                         fprintf(stderr, "Error seeking: %d\n", errno);
2178                         return -EIO;
2179                 }
2180         }
2181
2182         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2183                 fprintf(stderr, "Huh, didn't find the super?\n");
2184                 return -EINVAL;
2185         }
2186
2187         buffer = malloc(le32_to_cpu(item->size));
2188         if (!buffer) {
2189                 fprintf(stderr, "Error allocing buffer\n");
2190                 return -ENOMEM;
2191         }
2192
2193         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2194         if (ret != 1) {
2195                 fprintf(stderr, "Error reading buffer: %d\n", errno);
2196                 free(buffer);
2197                 return -EIO;
2198         }
2199
2200         if (mdres->compress_method == COMPRESS_ZLIB) {
2201                 size_t size = MAX_PENDING_SIZE * 2;
2202                 u8 *tmp;
2203
2204                 tmp = malloc(MAX_PENDING_SIZE * 2);
2205                 if (!tmp) {
2206                         free(buffer);
2207                         return -ENOMEM;
2208                 }
2209                 ret = uncompress(tmp, (unsigned long *)&size,
2210                                  buffer, le32_to_cpu(item->size));
2211                 if (ret != Z_OK) {
2212                         fprintf(stderr, "Error decompressing %d\n", ret);
2213                         free(buffer);
2214                         free(tmp);
2215                         return -EIO;
2216                 }
2217                 free(buffer);
2218                 buffer = tmp;
2219         }
2220
2221         pthread_mutex_lock(&mdres->mutex);
2222         super = (struct btrfs_super_block *)buffer;
2223         chunk_root_bytenr = btrfs_super_chunk_root(super);
2224         mdres->leafsize = btrfs_super_leafsize(super);
2225         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2226         memcpy(mdres->uuid, super->dev_item.uuid,
2227                        BTRFS_UUID_SIZE);
2228         mdres->devid = le64_to_cpu(super->dev_item.devid);
2229         free(buffer);
2230         pthread_mutex_unlock(&mdres->mutex);
2231
2232         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2233 }
2234
2235 static int __restore_metadump(const char *input, FILE *out, int old_restore,
2236                               int num_threads, int fixup_offset,
2237                               const char *target, int multi_devices)
2238 {
2239         struct meta_cluster *cluster = NULL;
2240         struct meta_cluster_header *header;
2241         struct mdrestore_struct mdrestore;
2242         struct btrfs_fs_info *info = NULL;
2243         u64 bytenr = 0;
2244         FILE *in = NULL;
2245         int ret = 0;
2246
2247         if (!strcmp(input, "-")) {
2248                 in = stdin;
2249         } else {
2250                 in = fopen(input, "r");
2251                 if (!in) {
2252                         perror("unable to open metadump image");
2253                         return 1;
2254                 }
2255         }
2256
2257         /* NOTE: open with write mode */
2258         if (fixup_offset) {
2259                 BUG_ON(!target);
2260                 info = open_ctree_fs_info(target, 0, 0,
2261                                           OPEN_CTREE_WRITES |
2262                                           OPEN_CTREE_RESTORE |
2263                                           OPEN_CTREE_PARTIAL);
2264                 if (!info) {
2265                         fprintf(stderr, "%s: open ctree failed\n", __func__);
2266                         ret = -EIO;
2267                         goto failed_open;
2268                 }
2269         }
2270
2271         cluster = malloc(BLOCK_SIZE);
2272         if (!cluster) {
2273                 fprintf(stderr, "Error allocating cluster\n");
2274                 ret = -ENOMEM;
2275                 goto failed_info;
2276         }
2277
2278         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2279                              fixup_offset, info, multi_devices);
2280         if (ret) {
2281                 fprintf(stderr, "Error initing mdrestore %d\n", ret);
2282                 goto failed_cluster;
2283         }
2284
2285         if (!multi_devices) {
2286                 ret = build_chunk_tree(&mdrestore, cluster);
2287                 if (ret)
2288                         goto out;
2289         }
2290
2291         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2292                 fprintf(stderr, "Error seeking %d\n", errno);
2293                 goto out;
2294         }
2295
2296         while (1) {
2297                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2298                 if (!ret)
2299                         break;
2300
2301                 header = &cluster->header;
2302                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2303                     le64_to_cpu(header->bytenr) != bytenr) {
2304                         fprintf(stderr, "bad header in metadump image\n");
2305                         ret = -EIO;
2306                         break;
2307                 }
2308                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2309                 if (ret) {
2310                         fprintf(stderr, "Error adding cluster\n");
2311                         break;
2312                 }
2313
2314                 ret = wait_for_worker(&mdrestore);
2315                 if (ret) {
2316                         fprintf(stderr, "One of the threads errored out %d\n",
2317                                 ret);
2318                         break;
2319                 }
2320         }
2321 out:
2322         mdrestore_destroy(&mdrestore, num_threads);
2323 failed_cluster:
2324         free(cluster);
2325 failed_info:
2326         if (fixup_offset && info)
2327                 close_ctree(info->chunk_root);
2328 failed_open:
2329         if (in != stdin)
2330                 fclose(in);
2331         return ret;
2332 }
2333
2334 static int restore_metadump(const char *input, FILE *out, int old_restore,
2335                             int num_threads, int multi_devices)
2336 {
2337         return __restore_metadump(input, out, old_restore, num_threads, 0, NULL,
2338                                   multi_devices);
2339 }
2340
2341 static int fixup_metadump(const char *input, FILE *out, int num_threads,
2342                           const char *target)
2343 {
2344         return __restore_metadump(input, out, 0, num_threads, 1, target, 1);
2345 }
2346
2347 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2348                                        const char *other_dev, u64 cur_devid)
2349 {
2350         struct btrfs_key key;
2351         struct extent_buffer *leaf;
2352         struct btrfs_path path;
2353         struct btrfs_dev_item *dev_item;
2354         struct btrfs_super_block *disk_super;
2355         char dev_uuid[BTRFS_UUID_SIZE];
2356         char fs_uuid[BTRFS_UUID_SIZE];
2357         u64 devid, type, io_align, io_width;
2358         u64 sector_size, total_bytes, bytes_used;
2359         char *buf;
2360         int fp;
2361         int ret;
2362
2363         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2364         key.type = BTRFS_DEV_ITEM_KEY;
2365         key.offset = cur_devid;
2366
2367         btrfs_init_path(&path);
2368         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2369         if (ret) {
2370                 fprintf(stderr, "search key fails\n");
2371                 exit(1);
2372         }
2373
2374         leaf = path.nodes[0];
2375         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2376                                   struct btrfs_dev_item);
2377
2378         devid = btrfs_device_id(leaf, dev_item);
2379         if (devid != cur_devid) {
2380                 printk("devid %llu mismatch with %llu\n", devid, cur_devid);
2381                 exit(1);
2382         }
2383
2384         type = btrfs_device_type(leaf, dev_item);
2385         io_align = btrfs_device_io_align(leaf, dev_item);
2386         io_width = btrfs_device_io_width(leaf, dev_item);
2387         sector_size = btrfs_device_sector_size(leaf, dev_item);
2388         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2389         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2390         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2391         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2392
2393         btrfs_release_path(&path);
2394
2395         printk("update disk super on %s devid=%llu\n", other_dev, devid);
2396
2397         /* update other devices' super block */
2398         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2399         if (fp < 0) {
2400                 fprintf(stderr, "could not open %s\n", other_dev);
2401                 exit(1);
2402         }
2403
2404         buf = malloc(BTRFS_SUPER_INFO_SIZE);
2405         if (!buf) {
2406                 ret = -ENOMEM;
2407                 exit(1);
2408         }
2409
2410         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2411
2412         disk_super = (struct btrfs_super_block *)buf;
2413         dev_item = &disk_super->dev_item;
2414
2415         btrfs_set_stack_device_type(dev_item, type);
2416         btrfs_set_stack_device_id(dev_item, devid);
2417         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2418         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2419         btrfs_set_stack_device_io_align(dev_item, io_align);
2420         btrfs_set_stack_device_io_width(dev_item, io_width);
2421         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2422         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2423         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2424         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2425
2426         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2427         if (ret != BTRFS_SUPER_INFO_SIZE) {
2428                 ret = -EIO;
2429                 goto out;
2430         }
2431
2432         write_backup_supers(fp, (u8 *)buf);
2433
2434 out:
2435         free(buf);
2436         close(fp);
2437         return 0;
2438 }
2439
2440 static void print_usage(void)
2441 {
2442         fprintf(stderr, "usage: btrfs-image [options] source target\n");
2443         fprintf(stderr, "\t-r      \trestore metadump image\n");
2444         fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
2445         fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
2446         fprintf(stderr, "\t-o      \tdon't mess with the chunk tree when restoring\n");
2447         fprintf(stderr, "\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2448         fprintf(stderr, "\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2449         fprintf(stderr, "\t-m      \trestore for multiple devices\n");
2450         exit(1);
2451 }
2452
2453 int main(int argc, char *argv[])
2454 {
2455         char *source;
2456         char *target;
2457         u64 num_threads = 0;
2458         u64 compress_level = 0;
2459         int create = 1;
2460         int old_restore = 0;
2461         int walk_trees = 0;
2462         int multi_devices = 0;
2463         int ret;
2464         int sanitize = 0;
2465         int dev_cnt = 0;
2466         int usage_error = 0;
2467         FILE *out;
2468
2469         while (1) {
2470                 int c = getopt(argc, argv, "rc:t:oswm");
2471                 if (c < 0)
2472                         break;
2473                 switch (c) {
2474                 case 'r':
2475                         create = 0;
2476                         break;
2477                 case 't':
2478                         num_threads = arg_strtou64(optarg);
2479                         if (num_threads > 32)
2480                                 print_usage();
2481                         break;
2482                 case 'c':
2483                         compress_level = arg_strtou64(optarg);
2484                         if (compress_level > 9)
2485                                 print_usage();
2486                         break;
2487                 case 'o':
2488                         old_restore = 1;
2489                         break;
2490                 case 's':
2491                         sanitize++;
2492                         break;
2493                 case 'w':
2494                         walk_trees = 1;
2495                         break;
2496                 case 'm':
2497                         create = 0;
2498                         multi_devices = 1;
2499                         break;
2500                 default:
2501                         print_usage();
2502                 }
2503         }
2504
2505         argc = argc - optind;
2506         dev_cnt = argc - 1;
2507
2508         if (create) {
2509                 if (old_restore) {
2510                         fprintf(stderr, "Usage error: create and restore cannot be used at the same time\n");
2511                         usage_error++;
2512                 }
2513         } else {
2514                 if (walk_trees || sanitize || compress_level) {
2515                         fprintf(stderr, "Usage error: use -w, -s, -c options for restore makes no sense\n");
2516                         usage_error++;
2517                 }
2518                 if (multi_devices && dev_cnt < 2) {
2519                         fprintf(stderr, "Usage error: not enough devices specified for -m option\n");
2520                         usage_error++;
2521                 }
2522                 if (!multi_devices && dev_cnt != 1) {
2523                         fprintf(stderr, "Usage error: accepts only 1 device without -m option\n");
2524                         usage_error++;
2525                 }
2526         }
2527
2528         if (usage_error)
2529                 print_usage();
2530
2531         source = argv[optind];
2532         target = argv[optind + 1];
2533
2534         if (create && !strcmp(target, "-")) {
2535                 out = stdout;
2536         } else {
2537                 out = fopen(target, "w+");
2538                 if (!out) {
2539                         perror("unable to create target file");
2540                         exit(1);
2541                 }
2542         }
2543
2544         if (num_threads == 0 && compress_level > 0) {
2545                 num_threads = sysconf(_SC_NPROCESSORS_ONLN);
2546                 if (num_threads <= 0)
2547                         num_threads = 1;
2548         }
2549
2550         if (create)
2551                 ret = create_metadump(source, out, num_threads,
2552                                       compress_level, sanitize, walk_trees);
2553         else
2554                 ret = restore_metadump(source, out, old_restore, 1,
2555                                        multi_devices);
2556         if (ret) {
2557                 printk("%s failed (%s)\n", (create) ? "create" : "restore",
2558                        strerror(errno));
2559                 goto out;
2560         }
2561
2562          /* extended support for multiple devices */
2563         if (!create && multi_devices) {
2564                 struct btrfs_fs_info *info;
2565                 u64 total_devs;
2566                 int i;
2567
2568                 info = open_ctree_fs_info(target, 0, 0,
2569                                           OPEN_CTREE_PARTIAL |
2570                                           OPEN_CTREE_RESTORE);
2571                 if (!info) {
2572                         int e = errno;
2573                         fprintf(stderr, "unable to open %s error = %s\n",
2574                                 target, strerror(e));
2575                         return 1;
2576                 }
2577
2578                 total_devs = btrfs_super_num_devices(info->super_copy);
2579                 if (total_devs != dev_cnt) {
2580                         printk("it needs %llu devices but has only %d\n",
2581                                 total_devs, dev_cnt);
2582                         close_ctree(info->chunk_root);
2583                         goto out;
2584                 }
2585
2586                 /* update super block on other disks */
2587                 for (i = 2; i <= dev_cnt; i++) {
2588                         ret = update_disk_super_on_device(info,
2589                                         argv[optind + i], (u64)i);
2590                         if (ret) {
2591                                 printk("update disk super failed devid=%d (error=%d)\n",
2592                                         i, ret);
2593                                 close_ctree(info->chunk_root);
2594                                 exit(1);
2595                         }
2596                 }
2597
2598                 close_ctree(info->chunk_root);
2599
2600                 /* fix metadata block to map correct chunk */
2601                 ret = fixup_metadump(source, out, 1, target);
2602                 if (ret) {
2603                         fprintf(stderr, "fix metadump failed (error=%d)\n",
2604                                 ret);
2605                         exit(1);
2606                 }
2607         }
2608
2609 out:
2610         if (out == stdout) {
2611                 fflush(out);
2612         } else {
2613                 fclose(out);
2614                 if (ret && create) {
2615                         int unlink_ret;
2616
2617                         unlink_ret = unlink(target);
2618                         if (unlink_ret)
2619                                 fprintf(stderr,
2620                                         "unlink output file failed : %s\n",
2621                                         strerror(errno));
2622                 }
2623         }
2624
2625         return !!ret;
2626 }