btrfs-progs: fix wrong max system array size check in user space
[platform/upstream/btrfs-progs.git] / btrfs-image.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <pthread.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <dirent.h>
29 #include <zlib.h>
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "version.h"
37 #include "volumes.h"
38 #include "extent_io.h"
39
40 #define HEADER_MAGIC            0xbd5c25e27295668bULL
41 #define MAX_PENDING_SIZE        (256 * 1024)
42 #define BLOCK_SIZE              1024
43 #define BLOCK_MASK              (BLOCK_SIZE - 1)
44
45 #define COMPRESS_NONE           0
46 #define COMPRESS_ZLIB           1
47
48 struct meta_cluster_item {
49         __le64 bytenr;
50         __le32 size;
51 } __attribute__ ((__packed__));
52
53 struct meta_cluster_header {
54         __le64 magic;
55         __le64 bytenr;
56         __le32 nritems;
57         u8 compress;
58 } __attribute__ ((__packed__));
59
60 /* cluster header + index items + buffers */
61 struct meta_cluster {
62         struct meta_cluster_header header;
63         struct meta_cluster_item items[];
64 } __attribute__ ((__packed__));
65
66 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
67                            sizeof(struct meta_cluster_item))
68
69 struct fs_chunk {
70         u64 logical;
71         u64 physical;
72         u64 bytes;
73         struct rb_node n;
74 };
75
76 struct async_work {
77         struct list_head list;
78         struct list_head ordered;
79         u64 start;
80         u64 size;
81         u8 *buffer;
82         size_t bufsize;
83         int error;
84 };
85
86 struct metadump_struct {
87         struct btrfs_root *root;
88         FILE *out;
89
90         struct meta_cluster *cluster;
91
92         pthread_t *threads;
93         size_t num_threads;
94         pthread_mutex_t mutex;
95         pthread_cond_t cond;
96         struct rb_root name_tree;
97
98         struct list_head list;
99         struct list_head ordered;
100         size_t num_items;
101         size_t num_ready;
102
103         u64 pending_start;
104         u64 pending_size;
105
106         int compress_level;
107         int done;
108         int data;
109         int sanitize_names;
110 };
111
112 struct name {
113         struct rb_node n;
114         char *val;
115         char *sub;
116         u32 len;
117 };
118
119 struct mdrestore_struct {
120         FILE *in;
121         FILE *out;
122
123         pthread_t *threads;
124         size_t num_threads;
125         pthread_mutex_t mutex;
126         pthread_cond_t cond;
127
128         struct rb_root chunk_tree;
129         struct list_head list;
130         size_t num_items;
131         u64 leafsize;
132         u64 devid;
133         u8 uuid[BTRFS_UUID_SIZE];
134         u8 fsid[BTRFS_FSID_SIZE];
135
136         int compress_method;
137         int done;
138         int error;
139         int old_restore;
140         int fixup_offset;
141         int multi_devices;
142         struct btrfs_fs_info *info;
143 };
144
145 static void print_usage(void) __attribute__((noreturn));
146 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
147                                    u64 search, u64 cluster_bytenr);
148 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
149
150 static void csum_block(u8 *buf, size_t len)
151 {
152         char result[BTRFS_CRC32_SIZE];
153         u32 crc = ~(u32)0;
154         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
155         btrfs_csum_final(crc, result);
156         memcpy(buf, result, BTRFS_CRC32_SIZE);
157 }
158
159 static int has_name(struct btrfs_key *key)
160 {
161         switch (key->type) {
162         case BTRFS_DIR_ITEM_KEY:
163         case BTRFS_DIR_INDEX_KEY:
164         case BTRFS_INODE_REF_KEY:
165         case BTRFS_INODE_EXTREF_KEY:
166         case BTRFS_XATTR_ITEM_KEY:
167                 return 1;
168         default:
169                 break;
170         }
171
172         return 0;
173 }
174
175 static char *generate_garbage(u32 name_len)
176 {
177         char *buf = malloc(name_len);
178         int i;
179
180         if (!buf)
181                 return NULL;
182
183         for (i = 0; i < name_len; i++) {
184                 char c = rand() % 94 + 33;
185
186                 if (c == '/')
187                         c++;
188                 buf[i] = c;
189         }
190
191         return buf;
192 }
193
194 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
195 {
196         struct name *entry = rb_entry(a, struct name, n);
197         struct name *ins = rb_entry(b, struct name, n);
198         u32 len;
199
200         len = min(ins->len, entry->len);
201         return memcmp(ins->val, entry->val, len);
202 }
203
204 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
205 {
206         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, n);
207         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, n);
208
209         if (fuzz && ins->logical >= entry->logical &&
210             ins->logical < entry->logical + entry->bytes)
211                 return 0;
212
213         if (ins->logical < entry->logical)
214                 return -1;
215         else if (ins->logical > entry->logical)
216                 return 1;
217         return 0;
218 }
219
220 static void tree_insert(struct rb_root *root, struct rb_node *ins,
221                         int (*cmp)(struct rb_node *a, struct rb_node *b,
222                                    int fuzz))
223 {
224         struct rb_node ** p = &root->rb_node;
225         struct rb_node * parent = NULL;
226         int dir;
227
228         while(*p) {
229                 parent = *p;
230
231                 dir = cmp(*p, ins, 0);
232                 if (dir < 0)
233                         p = &(*p)->rb_left;
234                 else if (dir > 0)
235                         p = &(*p)->rb_right;
236                 else
237                         BUG();
238         }
239
240         rb_link_node(ins, parent, p);
241         rb_insert_color(ins, root);
242 }
243
244 static struct rb_node *tree_search(struct rb_root *root,
245                                    struct rb_node *search,
246                                    int (*cmp)(struct rb_node *a,
247                                               struct rb_node *b, int fuzz),
248                                    int fuzz)
249 {
250         struct rb_node *n = root->rb_node;
251         int dir;
252
253         while (n) {
254                 dir = cmp(n, search, fuzz);
255                 if (dir < 0)
256                         n = n->rb_left;
257                 else if (dir > 0)
258                         n = n->rb_right;
259                 else
260                         return n;
261         }
262
263         return NULL;
264 }
265
266 static char *find_collision(struct metadump_struct *md, char *name,
267                             u32 name_len)
268 {
269         struct name *val;
270         struct rb_node *entry;
271         struct name tmp;
272         unsigned long checksum;
273         int found = 0;
274         int i;
275
276         tmp.val = name;
277         tmp.len = name_len;
278         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
279         if (entry) {
280                 val = rb_entry(entry, struct name, n);
281                 free(name);
282                 return val->sub;
283         }
284
285         val = malloc(sizeof(struct name));
286         if (!val) {
287                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
288                 free(name);
289                 return NULL;
290         }
291
292         memset(val, 0, sizeof(*val));
293
294         val->val = name;
295         val->len = name_len;
296         val->sub = malloc(name_len);
297         if (!val->sub) {
298                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
299                 free(val);
300                 free(name);
301                 return NULL;
302         }
303
304         checksum = crc32c(~1, val->val, name_len);
305         memset(val->sub, ' ', name_len);
306         i = 0;
307         while (1) {
308                 if (crc32c(~1, val->sub, name_len) == checksum &&
309                     memcmp(val->sub, val->val, val->len)) {
310                         found = 1;
311                         break;
312                 }
313
314                 if (val->sub[i] == 127) {
315                         do {
316                                 i++;
317                                 if (i >= name_len)
318                                         break;
319                         } while (val->sub[i] == 127);
320
321                         if (i >= name_len)
322                                 break;
323                         val->sub[i]++;
324                         if (val->sub[i] == '/')
325                                 val->sub[i]++;
326                         memset(val->sub, ' ', i);
327                         i = 0;
328                         continue;
329                 } else {
330                         val->sub[i]++;
331                         if (val->sub[i] == '/')
332                                 val->sub[i]++;
333                 }
334         }
335
336         if (!found) {
337                 fprintf(stderr, "Couldn't find a collision for '%.*s', "
338                         "generating normal garbage, it won't match indexes\n",
339                         val->len, val->val);
340                 for (i = 0; i < name_len; i++) {
341                         char c = rand() % 94 + 33;
342
343                         if (c == '/')
344                                 c++;
345                         val->sub[i] = c;
346                 }
347         }
348
349         tree_insert(&md->name_tree, &val->n, name_cmp);
350         return val->sub;
351 }
352
353 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
354                               int slot)
355 {
356         struct btrfs_dir_item *dir_item;
357         char *buf;
358         char *garbage;
359         unsigned long name_ptr;
360         u32 total_len;
361         u32 cur = 0;
362         u32 this_len;
363         u32 name_len;
364         int free_garbage = (md->sanitize_names == 1);
365
366         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
367         total_len = btrfs_item_size_nr(eb, slot);
368         while (cur < total_len) {
369                 this_len = sizeof(*dir_item) +
370                         btrfs_dir_name_len(eb, dir_item) +
371                         btrfs_dir_data_len(eb, dir_item);
372                 name_ptr = (unsigned long)(dir_item + 1);
373                 name_len = btrfs_dir_name_len(eb, dir_item);
374
375                 if (md->sanitize_names > 1) {
376                         buf = malloc(name_len);
377                         if (!buf) {
378                                 fprintf(stderr, "Couldn't sanitize name, "
379                                         "enomem\n");
380                                 return;
381                         }
382                         read_extent_buffer(eb, buf, name_ptr, name_len);
383                         garbage = find_collision(md, buf, name_len);
384                 } else {
385                         garbage = generate_garbage(name_len);
386                 }
387                 if (!garbage) {
388                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
389                         return;
390                 }
391                 write_extent_buffer(eb, garbage, name_ptr, name_len);
392                 cur += this_len;
393                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
394                                                      this_len);
395                 if (free_garbage)
396                         free(garbage);
397         }
398 }
399
400 static void sanitize_inode_ref(struct metadump_struct *md,
401                                struct extent_buffer *eb, int slot, int ext)
402 {
403         struct btrfs_inode_extref *extref;
404         struct btrfs_inode_ref *ref;
405         char *garbage, *buf;
406         unsigned long ptr;
407         unsigned long name_ptr;
408         u32 item_size;
409         u32 cur_offset = 0;
410         int len;
411         int free_garbage = (md->sanitize_names == 1);
412
413         item_size = btrfs_item_size_nr(eb, slot);
414         ptr = btrfs_item_ptr_offset(eb, slot);
415         while (cur_offset < item_size) {
416                 if (ext) {
417                         extref = (struct btrfs_inode_extref *)(ptr +
418                                                                cur_offset);
419                         name_ptr = (unsigned long)(&extref->name);
420                         len = btrfs_inode_extref_name_len(eb, extref);
421                         cur_offset += sizeof(*extref);
422                 } else {
423                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
424                         len = btrfs_inode_ref_name_len(eb, ref);
425                         name_ptr = (unsigned long)(ref + 1);
426                         cur_offset += sizeof(*ref);
427                 }
428                 cur_offset += len;
429
430                 if (md->sanitize_names > 1) {
431                         buf = malloc(len);
432                         if (!buf) {
433                                 fprintf(stderr, "Couldn't sanitize name, "
434                                         "enomem\n");
435                                 return;
436                         }
437                         read_extent_buffer(eb, buf, name_ptr, len);
438                         garbage = find_collision(md, buf, len);
439                 } else {
440                         garbage = generate_garbage(len);
441                 }
442
443                 if (!garbage) {
444                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
445                         return;
446                 }
447                 write_extent_buffer(eb, garbage, name_ptr, len);
448                 if (free_garbage)
449                         free(garbage);
450         }
451 }
452
453 static void sanitize_xattr(struct metadump_struct *md,
454                            struct extent_buffer *eb, int slot)
455 {
456         struct btrfs_dir_item *dir_item;
457         unsigned long data_ptr;
458         u32 data_len;
459
460         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
461         data_len = btrfs_dir_data_len(eb, dir_item);
462
463         data_ptr = (unsigned long)((char *)(dir_item + 1) +
464                                    btrfs_dir_name_len(eb, dir_item));
465         memset_extent_buffer(eb, 0, data_ptr, data_len);
466 }
467
468 static void sanitize_name(struct metadump_struct *md, u8 *dst,
469                           struct extent_buffer *src, struct btrfs_key *key,
470                           int slot)
471 {
472         struct extent_buffer *eb;
473
474         eb = alloc_dummy_eb(src->start, src->len);
475         if (!eb) {
476                 fprintf(stderr, "Couldn't sanitize name, no memory\n");
477                 return;
478         }
479
480         memcpy(eb->data, dst, eb->len);
481
482         switch (key->type) {
483         case BTRFS_DIR_ITEM_KEY:
484         case BTRFS_DIR_INDEX_KEY:
485                 sanitize_dir_item(md, eb, slot);
486                 break;
487         case BTRFS_INODE_REF_KEY:
488                 sanitize_inode_ref(md, eb, slot, 0);
489                 break;
490         case BTRFS_INODE_EXTREF_KEY:
491                 sanitize_inode_ref(md, eb, slot, 1);
492                 break;
493         case BTRFS_XATTR_ITEM_KEY:
494                 sanitize_xattr(md, eb, slot);
495                 break;
496         default:
497                 break;
498         }
499
500         memcpy(dst, eb->data, eb->len);
501         free(eb);
502 }
503
504 /*
505  * zero inline extents and csum items
506  */
507 static void zero_items(struct metadump_struct *md, u8 *dst,
508                        struct extent_buffer *src)
509 {
510         struct btrfs_file_extent_item *fi;
511         struct btrfs_item *item;
512         struct btrfs_key key;
513         u32 nritems = btrfs_header_nritems(src);
514         size_t size;
515         unsigned long ptr;
516         int i, extent_type;
517
518         for (i = 0; i < nritems; i++) {
519                 item = btrfs_item_nr(i);
520                 btrfs_item_key_to_cpu(src, &key, i);
521                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
522                         size = btrfs_item_size_nr(src, i);
523                         memset(dst + btrfs_leaf_data(src) +
524                                btrfs_item_offset_nr(src, i), 0, size);
525                         continue;
526                 }
527
528                 if (md->sanitize_names && has_name(&key)) {
529                         sanitize_name(md, dst, src, &key, i);
530                         continue;
531                 }
532
533                 if (key.type != BTRFS_EXTENT_DATA_KEY)
534                         continue;
535
536                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
537                 extent_type = btrfs_file_extent_type(src, fi);
538                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
539                         continue;
540
541                 ptr = btrfs_file_extent_inline_start(fi);
542                 size = btrfs_file_extent_inline_item_len(src, item);
543                 memset(dst + ptr, 0, size);
544         }
545 }
546
547 /*
548  * copy buffer and zero useless data in the buffer
549  */
550 static void copy_buffer(struct metadump_struct *md, u8 *dst,
551                         struct extent_buffer *src)
552 {
553         int level;
554         size_t size;
555         u32 nritems;
556
557         memcpy(dst, src->data, src->len);
558         if (src->start == BTRFS_SUPER_INFO_OFFSET)
559                 return;
560
561         level = btrfs_header_level(src);
562         nritems = btrfs_header_nritems(src);
563
564         if (nritems == 0) {
565                 size = sizeof(struct btrfs_header);
566                 memset(dst + size, 0, src->len - size);
567         } else if (level == 0) {
568                 size = btrfs_leaf_data(src) +
569                         btrfs_item_offset_nr(src, nritems - 1) -
570                         btrfs_item_nr_offset(nritems);
571                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
572                 zero_items(md, dst, src);
573         } else {
574                 size = offsetof(struct btrfs_node, ptrs) +
575                         sizeof(struct btrfs_key_ptr) * nritems;
576                 memset(dst + size, 0, src->len - size);
577         }
578         csum_block(dst, src->len);
579 }
580
581 static void *dump_worker(void *data)
582 {
583         struct metadump_struct *md = (struct metadump_struct *)data;
584         struct async_work *async;
585         int ret;
586
587         while (1) {
588                 pthread_mutex_lock(&md->mutex);
589                 while (list_empty(&md->list)) {
590                         if (md->done) {
591                                 pthread_mutex_unlock(&md->mutex);
592                                 goto out;
593                         }
594                         pthread_cond_wait(&md->cond, &md->mutex);
595                 }
596                 async = list_entry(md->list.next, struct async_work, list);
597                 list_del_init(&async->list);
598                 pthread_mutex_unlock(&md->mutex);
599
600                 if (md->compress_level > 0) {
601                         u8 *orig = async->buffer;
602
603                         async->bufsize = compressBound(async->size);
604                         async->buffer = malloc(async->bufsize);
605
606                         ret = compress2(async->buffer,
607                                          (unsigned long *)&async->bufsize,
608                                          orig, async->size, md->compress_level);
609
610                         if (ret != Z_OK)
611                                 async->error = 1;
612
613                         free(orig);
614                 }
615
616                 pthread_mutex_lock(&md->mutex);
617                 md->num_ready++;
618                 pthread_mutex_unlock(&md->mutex);
619         }
620 out:
621         pthread_exit(NULL);
622 }
623
624 static void meta_cluster_init(struct metadump_struct *md, u64 start)
625 {
626         struct meta_cluster_header *header;
627
628         md->num_items = 0;
629         md->num_ready = 0;
630         header = &md->cluster->header;
631         header->magic = cpu_to_le64(HEADER_MAGIC);
632         header->bytenr = cpu_to_le64(start);
633         header->nritems = cpu_to_le32(0);
634         header->compress = md->compress_level > 0 ?
635                            COMPRESS_ZLIB : COMPRESS_NONE;
636 }
637
638 static void metadump_destroy(struct metadump_struct *md, int num_threads)
639 {
640         int i;
641         struct rb_node *n;
642
643         pthread_mutex_lock(&md->mutex);
644         md->done = 1;
645         pthread_cond_broadcast(&md->cond);
646         pthread_mutex_unlock(&md->mutex);
647
648         for (i = 0; i < num_threads; i++)
649                 pthread_join(md->threads[i], NULL);
650
651         pthread_cond_destroy(&md->cond);
652         pthread_mutex_destroy(&md->mutex);
653
654         while ((n = rb_first(&md->name_tree))) {
655                 struct name *name;
656
657                 name = rb_entry(n, struct name, n);
658                 rb_erase(n, &md->name_tree);
659                 free(name->val);
660                 free(name->sub);
661                 free(name);
662         }
663         free(md->threads);
664         free(md->cluster);
665 }
666
667 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
668                          FILE *out, int num_threads, int compress_level,
669                          int sanitize_names)
670 {
671         int i, ret = 0;
672
673         memset(md, 0, sizeof(*md));
674         pthread_cond_init(&md->cond, NULL);
675         pthread_mutex_init(&md->mutex, NULL);
676         INIT_LIST_HEAD(&md->list);
677         INIT_LIST_HEAD(&md->ordered);
678         md->root = root;
679         md->out = out;
680         md->pending_start = (u64)-1;
681         md->compress_level = compress_level;
682         md->cluster = calloc(1, BLOCK_SIZE);
683         md->sanitize_names = sanitize_names;
684         if (sanitize_names > 1)
685                 crc32c_optimization_init();
686
687         if (!md->cluster) {
688                 pthread_cond_destroy(&md->cond);
689                 pthread_mutex_destroy(&md->mutex);
690                 return -ENOMEM;
691         }
692
693         meta_cluster_init(md, 0);
694         if (!num_threads)
695                 return 0;
696
697         md->name_tree.rb_node = NULL;
698         md->num_threads = num_threads;
699         md->threads = calloc(num_threads, sizeof(pthread_t));
700         if (!md->threads) {
701                 free(md->cluster);
702                 pthread_cond_destroy(&md->cond);
703                 pthread_mutex_destroy(&md->mutex);
704                 return -ENOMEM;
705         }
706
707         for (i = 0; i < num_threads; i++) {
708                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
709                 if (ret)
710                         break;
711         }
712
713         if (ret)
714                 metadump_destroy(md, i + 1);
715
716         return ret;
717 }
718
719 static int write_zero(FILE *out, size_t size)
720 {
721         static char zero[BLOCK_SIZE];
722         return fwrite(zero, size, 1, out);
723 }
724
725 static int write_buffers(struct metadump_struct *md, u64 *next)
726 {
727         struct meta_cluster_header *header = &md->cluster->header;
728         struct meta_cluster_item *item;
729         struct async_work *async;
730         u64 bytenr = 0;
731         u32 nritems = 0;
732         int ret;
733         int err = 0;
734
735         if (list_empty(&md->ordered))
736                 goto out;
737
738         /* wait until all buffers are compressed */
739         while (md->num_items > md->num_ready) {
740                 struct timespec ts = {
741                         .tv_sec = 0,
742                         .tv_nsec = 10000000,
743                 };
744                 pthread_mutex_unlock(&md->mutex);
745                 nanosleep(&ts, NULL);
746                 pthread_mutex_lock(&md->mutex);
747         }
748
749         /* setup and write index block */
750         list_for_each_entry(async, &md->ordered, ordered) {
751                 item = md->cluster->items + nritems;
752                 item->bytenr = cpu_to_le64(async->start);
753                 item->size = cpu_to_le32(async->bufsize);
754                 nritems++;
755         }
756         header->nritems = cpu_to_le32(nritems);
757
758         ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
759         if (ret != 1) {
760                 fprintf(stderr, "Error writing out cluster: %d\n", errno);
761                 return -EIO;
762         }
763
764         /* write buffers */
765         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
766         while (!list_empty(&md->ordered)) {
767                 async = list_entry(md->ordered.next, struct async_work,
768                                    ordered);
769                 list_del_init(&async->ordered);
770
771                 bytenr += async->bufsize;
772                 if (!err)
773                         ret = fwrite(async->buffer, async->bufsize, 1,
774                                      md->out);
775                 if (ret != 1) {
776                         err = -EIO;
777                         ret = 0;
778                         fprintf(stderr, "Error writing out cluster: %d\n",
779                                 errno);
780                 }
781
782                 free(async->buffer);
783                 free(async);
784         }
785
786         /* zero unused space in the last block */
787         if (!err && bytenr & BLOCK_MASK) {
788                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
789
790                 bytenr += size;
791                 ret = write_zero(md->out, size);
792                 if (ret != 1) {
793                         fprintf(stderr, "Error zeroing out buffer: %d\n",
794                                 errno);
795                         err = -EIO;
796                 }
797         }
798 out:
799         *next = bytenr;
800         return err;
801 }
802
803 static int read_data_extent(struct metadump_struct *md,
804                             struct async_work *async)
805 {
806         struct btrfs_multi_bio *multi = NULL;
807         struct btrfs_device *device;
808         u64 bytes_left = async->size;
809         u64 logical = async->start;
810         u64 offset = 0;
811         u64 bytenr;
812         u64 read_len;
813         ssize_t done;
814         int fd;
815         int ret;
816
817         while (bytes_left) {
818                 read_len = bytes_left;
819                 ret = btrfs_map_block(&md->root->fs_info->mapping_tree, READ,
820                                       logical, &read_len, &multi, 0, NULL);
821                 if (ret) {
822                         fprintf(stderr, "Couldn't map data block %d\n", ret);
823                         return ret;
824                 }
825
826                 device = multi->stripes[0].dev;
827
828                 if (device->fd == 0) {
829                         fprintf(stderr,
830                                 "Device we need to read from is not open\n");
831                         free(multi);
832                         return -EIO;
833                 }
834                 fd = device->fd;
835                 bytenr = multi->stripes[0].physical;
836                 free(multi);
837
838                 read_len = min(read_len, bytes_left);
839                 done = pread64(fd, async->buffer+offset, read_len, bytenr);
840                 if (done < read_len) {
841                         if (done < 0)
842                                 fprintf(stderr, "Error reading extent %d\n",
843                                         errno);
844                         else
845                                 fprintf(stderr, "Short read\n");
846                         return -EIO;
847                 }
848
849                 bytes_left -= done;
850                 offset += done;
851                 logical += done;
852         }
853
854         return 0;
855 }
856
857 static int flush_pending(struct metadump_struct *md, int done)
858 {
859         struct async_work *async = NULL;
860         struct extent_buffer *eb;
861         u64 blocksize = md->root->nodesize;
862         u64 start;
863         u64 size;
864         size_t offset;
865         int ret = 0;
866
867         if (md->pending_size) {
868                 async = calloc(1, sizeof(*async));
869                 if (!async)
870                         return -ENOMEM;
871
872                 async->start = md->pending_start;
873                 async->size = md->pending_size;
874                 async->bufsize = async->size;
875                 async->buffer = malloc(async->bufsize);
876                 if (!async->buffer) {
877                         free(async);
878                         return -ENOMEM;
879                 }
880                 offset = 0;
881                 start = async->start;
882                 size = async->size;
883
884                 if (md->data) {
885                         ret = read_data_extent(md, async);
886                         if (ret) {
887                                 free(async->buffer);
888                                 free(async);
889                                 return ret;
890                         }
891                 }
892
893                 while (!md->data && size > 0) {
894                         u64 this_read = min(blocksize, size);
895                         eb = read_tree_block(md->root, start, this_read, 0);
896                         if (!eb) {
897                                 free(async->buffer);
898                                 free(async);
899                                 fprintf(stderr,
900                                         "Error reading metadata block\n");
901                                 return -EIO;
902                         }
903                         copy_buffer(md, async->buffer + offset, eb);
904                         free_extent_buffer(eb);
905                         start += this_read;
906                         offset += this_read;
907                         size -= this_read;
908                 }
909
910                 md->pending_start = (u64)-1;
911                 md->pending_size = 0;
912         } else if (!done) {
913                 return 0;
914         }
915
916         pthread_mutex_lock(&md->mutex);
917         if (async) {
918                 list_add_tail(&async->ordered, &md->ordered);
919                 md->num_items++;
920                 if (md->compress_level > 0) {
921                         list_add_tail(&async->list, &md->list);
922                         pthread_cond_signal(&md->cond);
923                 } else {
924                         md->num_ready++;
925                 }
926         }
927         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
928                 ret = write_buffers(md, &start);
929                 if (ret)
930                         fprintf(stderr, "Error writing buffers %d\n",
931                                 errno);
932                 else
933                         meta_cluster_init(md, start);
934         }
935         pthread_mutex_unlock(&md->mutex);
936         return ret;
937 }
938
939 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
940                       int data)
941 {
942         int ret;
943         if (md->data != data ||
944             md->pending_size + size > MAX_PENDING_SIZE ||
945             md->pending_start + md->pending_size != start) {
946                 ret = flush_pending(md, 0);
947                 if (ret)
948                         return ret;
949                 md->pending_start = start;
950         }
951         readahead_tree_block(md->root, start, size, 0);
952         md->pending_size += size;
953         md->data = data;
954         return 0;
955 }
956
957 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
958 static int is_tree_block(struct btrfs_root *extent_root,
959                          struct btrfs_path *path, u64 bytenr)
960 {
961         struct extent_buffer *leaf;
962         struct btrfs_key key;
963         u64 ref_objectid;
964         int ret;
965
966         leaf = path->nodes[0];
967         while (1) {
968                 struct btrfs_extent_ref_v0 *ref_item;
969                 path->slots[0]++;
970                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
971                         ret = btrfs_next_leaf(extent_root, path);
972                         if (ret < 0)
973                                 return ret;
974                         if (ret > 0)
975                                 break;
976                         leaf = path->nodes[0];
977                 }
978                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
979                 if (key.objectid != bytenr)
980                         break;
981                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
982                         continue;
983                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
984                                           struct btrfs_extent_ref_v0);
985                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
986                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
987                         return 1;
988                 break;
989         }
990         return 0;
991 }
992 #endif
993
994 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
995                             struct metadump_struct *metadump, int root_tree)
996 {
997         struct extent_buffer *tmp;
998         struct btrfs_root_item *ri;
999         struct btrfs_key key;
1000         u64 bytenr;
1001         int level;
1002         int nritems = 0;
1003         int i = 0;
1004         int ret;
1005
1006         ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0);
1007         if (ret) {
1008                 fprintf(stderr, "Error adding metadata block\n");
1009                 return ret;
1010         }
1011
1012         if (btrfs_header_level(eb) == 0 && !root_tree)
1013                 return 0;
1014
1015         level = btrfs_header_level(eb);
1016         nritems = btrfs_header_nritems(eb);
1017         for (i = 0; i < nritems; i++) {
1018                 if (level == 0) {
1019                         btrfs_item_key_to_cpu(eb, &key, i);
1020                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1021                                 continue;
1022                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1023                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1024                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
1025                         if (!tmp) {
1026                                 fprintf(stderr,
1027                                         "Error reading log root block\n");
1028                                 return -EIO;
1029                         }
1030                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1031                         free_extent_buffer(tmp);
1032                         if (ret)
1033                                 return ret;
1034                 } else {
1035                         bytenr = btrfs_node_blockptr(eb, i);
1036                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
1037                         if (!tmp) {
1038                                 fprintf(stderr, "Error reading log block\n");
1039                                 return -EIO;
1040                         }
1041                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1042                         free_extent_buffer(tmp);
1043                         if (ret)
1044                                 return ret;
1045                 }
1046         }
1047
1048         return 0;
1049 }
1050
1051 static int copy_log_trees(struct btrfs_root *root,
1052                           struct metadump_struct *metadump,
1053                           struct btrfs_path *path)
1054 {
1055         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1056
1057         if (blocknr == 0)
1058                 return 0;
1059
1060         if (!root->fs_info->log_root_tree ||
1061             !root->fs_info->log_root_tree->node) {
1062                 fprintf(stderr, "Error copying tree log, it wasn't setup\n");
1063                 return -EIO;
1064         }
1065
1066         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1067                                 metadump, 1);
1068 }
1069
1070 static int copy_space_cache(struct btrfs_root *root,
1071                             struct metadump_struct *metadump,
1072                             struct btrfs_path *path)
1073 {
1074         struct extent_buffer *leaf;
1075         struct btrfs_file_extent_item *fi;
1076         struct btrfs_key key;
1077         u64 bytenr, num_bytes;
1078         int ret;
1079
1080         root = root->fs_info->tree_root;
1081
1082         key.objectid = 0;
1083         key.type = BTRFS_EXTENT_DATA_KEY;
1084         key.offset = 0;
1085
1086         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1087         if (ret < 0) {
1088                 fprintf(stderr, "Error searching for free space inode %d\n",
1089                         ret);
1090                 return ret;
1091         }
1092
1093         while (1) {
1094                 leaf = path->nodes[0];
1095                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1096                         ret = btrfs_next_leaf(root, path);
1097                         if (ret < 0) {
1098                                 fprintf(stderr, "Error going to next leaf "
1099                                         "%d\n", ret);
1100                                 return ret;
1101                         }
1102                         if (ret > 0)
1103                                 break;
1104                         leaf = path->nodes[0];
1105                 }
1106
1107                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1108                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1109                         path->slots[0]++;
1110                         continue;
1111                 }
1112
1113                 fi = btrfs_item_ptr(leaf, path->slots[0],
1114                                     struct btrfs_file_extent_item);
1115                 if (btrfs_file_extent_type(leaf, fi) !=
1116                     BTRFS_FILE_EXTENT_REG) {
1117                         path->slots[0]++;
1118                         continue;
1119                 }
1120
1121                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1122                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1123                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1124                 if (ret) {
1125                         fprintf(stderr, "Error adding space cache blocks %d\n",
1126                                 ret);
1127                         btrfs_release_path(path);
1128                         return ret;
1129                 }
1130                 path->slots[0]++;
1131         }
1132
1133         return 0;
1134 }
1135
1136 static int copy_from_extent_tree(struct metadump_struct *metadump,
1137                                  struct btrfs_path *path)
1138 {
1139         struct btrfs_root *extent_root;
1140         struct extent_buffer *leaf;
1141         struct btrfs_extent_item *ei;
1142         struct btrfs_key key;
1143         u64 bytenr;
1144         u64 num_bytes;
1145         int ret;
1146
1147         extent_root = metadump->root->fs_info->extent_root;
1148         bytenr = BTRFS_SUPER_INFO_OFFSET + 4096;
1149         key.objectid = bytenr;
1150         key.type = BTRFS_EXTENT_ITEM_KEY;
1151         key.offset = 0;
1152
1153         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1154         if (ret < 0) {
1155                 fprintf(stderr, "Error searching extent root %d\n", ret);
1156                 return ret;
1157         }
1158         ret = 0;
1159
1160         while (1) {
1161                 leaf = path->nodes[0];
1162                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1163                         ret = btrfs_next_leaf(extent_root, path);
1164                         if (ret < 0) {
1165                                 fprintf(stderr, "Error going to next leaf %d"
1166                                         "\n", ret);
1167                                 break;
1168                         }
1169                         if (ret > 0) {
1170                                 ret = 0;
1171                                 break;
1172                         }
1173                         leaf = path->nodes[0];
1174                 }
1175
1176                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1177                 if (key.objectid < bytenr ||
1178                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1179                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1180                         path->slots[0]++;
1181                         continue;
1182                 }
1183
1184                 bytenr = key.objectid;
1185                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1186                         num_bytes = extent_root->leafsize;
1187                 else
1188                         num_bytes = key.offset;
1189
1190                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1191                         ei = btrfs_item_ptr(leaf, path->slots[0],
1192                                             struct btrfs_extent_item);
1193                         if (btrfs_extent_flags(leaf, ei) &
1194                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1195                                 ret = add_extent(bytenr, num_bytes, metadump,
1196                                                  0);
1197                                 if (ret) {
1198                                         fprintf(stderr, "Error adding block "
1199                                                 "%d\n", ret);
1200                                         break;
1201                                 }
1202                         }
1203                 } else {
1204 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1205                         ret = is_tree_block(extent_root, path, bytenr);
1206                         if (ret < 0) {
1207                                 fprintf(stderr, "Error checking tree block "
1208                                         "%d\n", ret);
1209                                 break;
1210                         }
1211
1212                         if (ret) {
1213                                 ret = add_extent(bytenr, num_bytes, metadump,
1214                                                  0);
1215                                 if (ret) {
1216                                         fprintf(stderr, "Error adding block "
1217                                                 "%d\n", ret);
1218                                         break;
1219                                 }
1220                         }
1221                         ret = 0;
1222 #else
1223                         fprintf(stderr, "Either extent tree corruption or "
1224                                 "you haven't built with V0 support\n");
1225                         ret = -EIO;
1226                         break;
1227 #endif
1228                 }
1229                 bytenr += num_bytes;
1230         }
1231
1232         btrfs_release_path(path);
1233
1234         return ret;
1235 }
1236
1237 static int create_metadump(const char *input, FILE *out, int num_threads,
1238                            int compress_level, int sanitize, int walk_trees)
1239 {
1240         struct btrfs_root *root;
1241         struct btrfs_path *path = NULL;
1242         struct metadump_struct metadump;
1243         int ret;
1244         int err = 0;
1245
1246         root = open_ctree(input, 0, 0);
1247         if (!root) {
1248                 fprintf(stderr, "Open ctree failed\n");
1249                 return -EIO;
1250         }
1251
1252         BUG_ON(root->nodesize != root->leafsize);
1253
1254         ret = metadump_init(&metadump, root, out, num_threads,
1255                             compress_level, sanitize);
1256         if (ret) {
1257                 fprintf(stderr, "Error initing metadump %d\n", ret);
1258                 close_ctree(root);
1259                 return ret;
1260         }
1261
1262         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump, 0);
1263         if (ret) {
1264                 fprintf(stderr, "Error adding metadata %d\n", ret);
1265                 err = ret;
1266                 goto out;
1267         }
1268
1269         path = btrfs_alloc_path();
1270         if (!path) {
1271                 fprintf(stderr, "Out of memory allocing path\n");
1272                 err = -ENOMEM;
1273                 goto out;
1274         }
1275
1276         if (walk_trees) {
1277                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1278                                        &metadump, 1);
1279                 if (ret) {
1280                         err = ret;
1281                         goto out;
1282                 }
1283
1284                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1285                                        &metadump, 1);
1286                 if (ret) {
1287                         err = ret;
1288                         goto out;
1289                 }
1290         } else {
1291                 ret = copy_from_extent_tree(&metadump, path);
1292                 if (ret) {
1293                         err = ret;
1294                         goto out;
1295                 }
1296         }
1297
1298         ret = copy_log_trees(root, &metadump, path);
1299         if (ret) {
1300                 err = ret;
1301                 goto out;
1302         }
1303
1304         ret = copy_space_cache(root, &metadump, path);
1305 out:
1306         ret = flush_pending(&metadump, 1);
1307         if (ret) {
1308                 if (!err)
1309                         err = ret;
1310                 fprintf(stderr, "Error flushing pending %d\n", ret);
1311         }
1312
1313         metadump_destroy(&metadump, num_threads);
1314
1315         btrfs_free_path(path);
1316         ret = close_ctree(root);
1317         return err ? err : ret;
1318 }
1319
1320 static void update_super_old(u8 *buffer)
1321 {
1322         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1323         struct btrfs_chunk *chunk;
1324         struct btrfs_disk_key *key;
1325         u32 sectorsize = btrfs_super_sectorsize(super);
1326         u64 flags = btrfs_super_flags(super);
1327
1328         flags |= BTRFS_SUPER_FLAG_METADUMP;
1329         btrfs_set_super_flags(super, flags);
1330
1331         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1332         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1333                                        sizeof(struct btrfs_disk_key));
1334
1335         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1336         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1337         btrfs_set_disk_key_offset(key, 0);
1338
1339         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1340         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1341         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1342         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1343         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1344         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1345         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1346         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1347         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1348         chunk->stripe.devid = super->dev_item.devid;
1349         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1350         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1351         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1352         csum_block(buffer, 4096);
1353 }
1354
1355 static int update_super(u8 *buffer)
1356 {
1357         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1358         struct btrfs_chunk *chunk;
1359         struct btrfs_disk_key *disk_key;
1360         struct btrfs_key key;
1361         u32 new_array_size = 0;
1362         u32 array_size;
1363         u32 cur = 0;
1364         u8 *ptr, *write_ptr;
1365         int old_num_stripes;
1366
1367         write_ptr = ptr = super->sys_chunk_array;
1368         array_size = btrfs_super_sys_array_size(super);
1369
1370         while (cur < array_size) {
1371                 disk_key = (struct btrfs_disk_key *)ptr;
1372                 btrfs_disk_key_to_cpu(&key, disk_key);
1373
1374                 new_array_size += sizeof(*disk_key);
1375                 memmove(write_ptr, ptr, sizeof(*disk_key));
1376
1377                 write_ptr += sizeof(*disk_key);
1378                 ptr += sizeof(*disk_key);
1379                 cur += sizeof(*disk_key);
1380
1381                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1382                         chunk = (struct btrfs_chunk *)ptr;
1383                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1384                         chunk = (struct btrfs_chunk *)write_ptr;
1385
1386                         memmove(write_ptr, ptr, sizeof(*chunk));
1387                         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1388                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1389                         btrfs_set_stack_chunk_type(chunk,
1390                                                    BTRFS_BLOCK_GROUP_SYSTEM);
1391                         chunk->stripe.devid = super->dev_item.devid;
1392                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1393                                BTRFS_UUID_SIZE);
1394                         new_array_size += sizeof(*chunk);
1395                 } else {
1396                         fprintf(stderr, "Bogus key in the sys chunk array "
1397                                 "%d\n", key.type);
1398                         return -EIO;
1399                 }
1400                 write_ptr += sizeof(*chunk);
1401                 ptr += btrfs_chunk_item_size(old_num_stripes);
1402                 cur += btrfs_chunk_item_size(old_num_stripes);
1403         }
1404
1405         btrfs_set_super_sys_array_size(super, new_array_size);
1406         csum_block(buffer, 4096);
1407
1408         return 0;
1409 }
1410
1411 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1412 {
1413         struct extent_buffer *eb;
1414
1415         eb = malloc(sizeof(struct extent_buffer) + size);
1416         if (!eb)
1417                 return NULL;
1418         memset(eb, 0, sizeof(struct extent_buffer) + size);
1419
1420         eb->start = bytenr;
1421         eb->len = size;
1422         return eb;
1423 }
1424
1425 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1426 {
1427         struct btrfs_item *item;
1428         u32 nritems;
1429         u32 old_size;
1430         u32 old_data_start;
1431         u32 size_diff;
1432         u32 data_end;
1433         int i;
1434
1435         old_size = btrfs_item_size_nr(eb, slot);
1436         if (old_size == new_size)
1437                 return;
1438
1439         nritems = btrfs_header_nritems(eb);
1440         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1441
1442         old_data_start = btrfs_item_offset_nr(eb, slot);
1443         size_diff = old_size - new_size;
1444
1445         for (i = slot; i < nritems; i++) {
1446                 u32 ioff;
1447                 item = btrfs_item_nr(i);
1448                 ioff = btrfs_item_offset(eb, item);
1449                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1450         }
1451
1452         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1453                               btrfs_leaf_data(eb) + data_end,
1454                               old_data_start + new_size - data_end);
1455         item = btrfs_item_nr(slot);
1456         btrfs_set_item_size(eb, item, new_size);
1457 }
1458
1459 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1460                                   struct async_work *async, u8 *buffer,
1461                                   size_t size)
1462 {
1463         struct extent_buffer *eb;
1464         size_t size_left = size;
1465         u64 bytenr = async->start;
1466         int i;
1467
1468         if (size_left % mdres->leafsize)
1469                 return 0;
1470
1471         eb = alloc_dummy_eb(bytenr, mdres->leafsize);
1472         if (!eb)
1473                 return -ENOMEM;
1474
1475         while (size_left) {
1476                 eb->start = bytenr;
1477                 memcpy(eb->data, buffer, mdres->leafsize);
1478
1479                 if (btrfs_header_bytenr(eb) != bytenr)
1480                         break;
1481                 if (memcmp(mdres->fsid,
1482                            eb->data + offsetof(struct btrfs_header, fsid),
1483                            BTRFS_FSID_SIZE))
1484                         break;
1485
1486                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1487                         goto next;
1488
1489                 if (btrfs_header_level(eb) != 0)
1490                         goto next;
1491
1492                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1493                         struct btrfs_chunk chunk;
1494                         struct btrfs_key key;
1495                         u64 type;
1496
1497                         btrfs_item_key_to_cpu(eb, &key, i);
1498                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1499                                 continue;
1500                         truncate_item(eb, i, sizeof(chunk));
1501                         read_extent_buffer(eb, &chunk,
1502                                            btrfs_item_ptr_offset(eb, i),
1503                                            sizeof(chunk));
1504
1505                         /* Zero out the RAID profile */
1506                         type = btrfs_stack_chunk_type(&chunk);
1507                         type &= (BTRFS_BLOCK_GROUP_DATA |
1508                                  BTRFS_BLOCK_GROUP_SYSTEM |
1509                                  BTRFS_BLOCK_GROUP_METADATA |
1510                                  BTRFS_BLOCK_GROUP_DUP);
1511                         btrfs_set_stack_chunk_type(&chunk, type);
1512
1513                         btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1514                         btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1515                         btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid);
1516                         memcpy(chunk.stripe.dev_uuid, mdres->uuid,
1517                                BTRFS_UUID_SIZE);
1518                         write_extent_buffer(eb, &chunk,
1519                                             btrfs_item_ptr_offset(eb, i),
1520                                             sizeof(chunk));
1521                 }
1522                 memcpy(buffer, eb->data, eb->len);
1523                 csum_block(buffer, eb->len);
1524 next:
1525                 size_left -= mdres->leafsize;
1526                 buffer += mdres->leafsize;
1527                 bytenr += mdres->leafsize;
1528         }
1529
1530         free(eb);
1531         return 0;
1532 }
1533
1534 static void write_backup_supers(int fd, u8 *buf)
1535 {
1536         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1537         struct stat st;
1538         u64 size;
1539         u64 bytenr;
1540         int i;
1541         int ret;
1542
1543         if (fstat(fd, &st)) {
1544                 fprintf(stderr, "Couldn't stat restore point, won't be able "
1545                         "to write backup supers: %d\n", errno);
1546                 return;
1547         }
1548
1549         size = btrfs_device_size(fd, &st);
1550
1551         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1552                 bytenr = btrfs_sb_offset(i);
1553                 if (bytenr + 4096 > size)
1554                         break;
1555                 btrfs_set_super_bytenr(super, bytenr);
1556                 csum_block(buf, 4096);
1557                 ret = pwrite64(fd, buf, 4096, bytenr);
1558                 if (ret < 4096) {
1559                         if (ret < 0)
1560                                 fprintf(stderr, "Problem writing out backup "
1561                                         "super block %d, err %d\n", i, errno);
1562                         else
1563                                 fprintf(stderr, "Short write writing out "
1564                                         "backup super block\n");
1565                         break;
1566                 }
1567         }
1568 }
1569
1570 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size)
1571 {
1572         struct fs_chunk *fs_chunk;
1573         struct rb_node *entry;
1574         struct fs_chunk search;
1575         u64 offset;
1576
1577         if (logical == BTRFS_SUPER_INFO_OFFSET)
1578                 return logical;
1579
1580         search.logical = logical;
1581         entry = tree_search(&mdres->chunk_tree, &search.n, chunk_cmp, 1);
1582         if (!entry) {
1583                 if (mdres->in != stdin)
1584                         printf("Couldn't find a chunk, using logical\n");
1585                 return logical;
1586         }
1587         fs_chunk = rb_entry(entry, struct fs_chunk, n);
1588         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
1589                 BUG();
1590         offset = search.logical - fs_chunk->logical;
1591
1592         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
1593         return fs_chunk->physical + offset;
1594 }
1595
1596 static void *restore_worker(void *data)
1597 {
1598         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1599         struct async_work *async;
1600         size_t size;
1601         u8 *buffer;
1602         u8 *outbuf;
1603         int outfd;
1604         int ret;
1605         int compress_size = MAX_PENDING_SIZE * 4;
1606
1607         outfd = fileno(mdres->out);
1608         buffer = malloc(compress_size);
1609         if (!buffer) {
1610                 fprintf(stderr, "Error allocing buffer\n");
1611                 pthread_mutex_lock(&mdres->mutex);
1612                 if (!mdres->error)
1613                         mdres->error = -ENOMEM;
1614                 pthread_mutex_unlock(&mdres->mutex);
1615                 goto out;
1616         }
1617
1618         while (1) {
1619                 u64 bytenr;
1620                 off_t offset = 0;
1621                 int err = 0;
1622
1623                 pthread_mutex_lock(&mdres->mutex);
1624                 while (!mdres->leafsize || list_empty(&mdres->list)) {
1625                         if (mdres->done) {
1626                                 pthread_mutex_unlock(&mdres->mutex);
1627                                 goto out;
1628                         }
1629                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1630                 }
1631                 async = list_entry(mdres->list.next, struct async_work, list);
1632                 list_del_init(&async->list);
1633                 pthread_mutex_unlock(&mdres->mutex);
1634
1635                 if (mdres->compress_method == COMPRESS_ZLIB) {
1636                         size = compress_size; 
1637                         ret = uncompress(buffer, (unsigned long *)&size,
1638                                          async->buffer, async->bufsize);
1639                         if (ret != Z_OK) {
1640                                 fprintf(stderr, "Error decompressing %d\n",
1641                                         ret);
1642                                 err = -EIO;
1643                         }
1644                         outbuf = buffer;
1645                 } else {
1646                         outbuf = async->buffer;
1647                         size = async->bufsize;
1648                 }
1649
1650                 if (!mdres->multi_devices) {
1651                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1652                                 if (mdres->old_restore) {
1653                                         update_super_old(outbuf);
1654                                 } else {
1655                                         ret = update_super(outbuf);
1656                                         if (ret)
1657                                                 err = ret;
1658                                 }
1659                         } else if (!mdres->old_restore) {
1660                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1661                                 if (ret)
1662                                         err = ret;
1663                         }
1664                 }
1665
1666                 if (!mdres->fixup_offset) {
1667                         while (size) {
1668                                 u64 chunk_size = size;
1669                                 if (!mdres->multi_devices)
1670                                         bytenr = logical_to_physical(mdres,
1671                                                                      async->start + offset,
1672                                                                      &chunk_size);
1673                                 else
1674                                         bytenr = async->start + offset;
1675
1676                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1677                                                bytenr);
1678                                 if (ret != chunk_size) {
1679                                         if (ret < 0) {
1680                                                 fprintf(stderr, "Error writing to "
1681                                                         "device %d\n", errno);
1682                                                 err = errno;
1683                                                 break;
1684                                         } else {
1685                                                 fprintf(stderr, "Short write\n");
1686                                                 err = -EIO;
1687                                                 break;
1688                                         }
1689                                 }
1690                                 size -= chunk_size;
1691                                 offset += chunk_size;
1692                         }
1693                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1694                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1695                         if (ret) {
1696                                 printk("Error write data\n");
1697                                 exit(1);
1698                         }
1699                 }
1700
1701
1702                 /* backup super blocks are already there at fixup_offset stage */
1703                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1704                         write_backup_supers(outfd, outbuf);
1705
1706                 pthread_mutex_lock(&mdres->mutex);
1707                 if (err && !mdres->error)
1708                         mdres->error = err;
1709                 mdres->num_items--;
1710                 pthread_mutex_unlock(&mdres->mutex);
1711
1712                 free(async->buffer);
1713                 free(async);
1714         }
1715 out:
1716         free(buffer);
1717         pthread_exit(NULL);
1718 }
1719
1720 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1721 {
1722         struct rb_node *n;
1723         int i;
1724
1725         while ((n = rb_first(&mdres->chunk_tree))) {
1726                 struct fs_chunk *entry;
1727
1728                 entry = rb_entry(n, struct fs_chunk, n);
1729                 rb_erase(n, &mdres->chunk_tree);
1730                 free(entry);
1731         }
1732         pthread_mutex_lock(&mdres->mutex);
1733         mdres->done = 1;
1734         pthread_cond_broadcast(&mdres->cond);
1735         pthread_mutex_unlock(&mdres->mutex);
1736
1737         for (i = 0; i < num_threads; i++)
1738                 pthread_join(mdres->threads[i], NULL);
1739
1740         pthread_cond_destroy(&mdres->cond);
1741         pthread_mutex_destroy(&mdres->mutex);
1742         free(mdres->threads);
1743 }
1744
1745 static int mdrestore_init(struct mdrestore_struct *mdres,
1746                           FILE *in, FILE *out, int old_restore,
1747                           int num_threads, int fixup_offset,
1748                           struct btrfs_fs_info *info, int multi_devices)
1749 {
1750         int i, ret = 0;
1751
1752         memset(mdres, 0, sizeof(*mdres));
1753         pthread_cond_init(&mdres->cond, NULL);
1754         pthread_mutex_init(&mdres->mutex, NULL);
1755         INIT_LIST_HEAD(&mdres->list);
1756         mdres->in = in;
1757         mdres->out = out;
1758         mdres->old_restore = old_restore;
1759         mdres->chunk_tree.rb_node = NULL;
1760         mdres->fixup_offset = fixup_offset;
1761         mdres->info = info;
1762         mdres->multi_devices = multi_devices;
1763
1764         if (!num_threads)
1765                 return 0;
1766
1767         mdres->num_threads = num_threads;
1768         mdres->threads = calloc(num_threads, sizeof(pthread_t));
1769         if (!mdres->threads)
1770                 return -ENOMEM;
1771         for (i = 0; i < num_threads; i++) {
1772                 ret = pthread_create(mdres->threads + i, NULL, restore_worker,
1773                                      mdres);
1774                 if (ret)
1775                         break;
1776         }
1777         if (ret)
1778                 mdrestore_destroy(mdres, i + 1);
1779         return ret;
1780 }
1781
1782 static int fill_mdres_info(struct mdrestore_struct *mdres,
1783                            struct async_work *async)
1784 {
1785         struct btrfs_super_block *super;
1786         u8 *buffer = NULL;
1787         u8 *outbuf;
1788         int ret;
1789
1790         /* We've already been initialized */
1791         if (mdres->leafsize)
1792                 return 0;
1793
1794         if (mdres->compress_method == COMPRESS_ZLIB) {
1795                 size_t size = MAX_PENDING_SIZE * 2;
1796
1797                 buffer = malloc(MAX_PENDING_SIZE * 2);
1798                 if (!buffer)
1799                         return -ENOMEM;
1800                 ret = uncompress(buffer, (unsigned long *)&size,
1801                                  async->buffer, async->bufsize);
1802                 if (ret != Z_OK) {
1803                         fprintf(stderr, "Error decompressing %d\n", ret);
1804                         free(buffer);
1805                         return -EIO;
1806                 }
1807                 outbuf = buffer;
1808         } else {
1809                 outbuf = async->buffer;
1810         }
1811
1812         super = (struct btrfs_super_block *)outbuf;
1813         mdres->leafsize = btrfs_super_leafsize(super);
1814         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1815         memcpy(mdres->uuid, super->dev_item.uuid,
1816                        BTRFS_UUID_SIZE);
1817         mdres->devid = le64_to_cpu(super->dev_item.devid);
1818         free(buffer);
1819         return 0;
1820 }
1821
1822 static int add_cluster(struct meta_cluster *cluster,
1823                        struct mdrestore_struct *mdres, u64 *next)
1824 {
1825         struct meta_cluster_item *item;
1826         struct meta_cluster_header *header = &cluster->header;
1827         struct async_work *async;
1828         u64 bytenr;
1829         u32 i, nritems;
1830         int ret;
1831
1832         BUG_ON(mdres->num_items);
1833         mdres->compress_method = header->compress;
1834
1835         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1836         nritems = le32_to_cpu(header->nritems);
1837         for (i = 0; i < nritems; i++) {
1838                 item = &cluster->items[i];
1839                 async = calloc(1, sizeof(*async));
1840                 if (!async) {
1841                         fprintf(stderr, "Error allocating async\n");
1842                         return -ENOMEM;
1843                 }
1844                 async->start = le64_to_cpu(item->bytenr);
1845                 async->bufsize = le32_to_cpu(item->size);
1846                 async->buffer = malloc(async->bufsize);
1847                 if (!async->buffer) {
1848                         fprintf(stderr, "Error allocing async buffer\n");
1849                         free(async);
1850                         return -ENOMEM;
1851                 }
1852                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1853                 if (ret != 1) {
1854                         fprintf(stderr, "Error reading buffer %d\n", errno);
1855                         free(async->buffer);
1856                         free(async);
1857                         return -EIO;
1858                 }
1859                 bytenr += async->bufsize;
1860
1861                 pthread_mutex_lock(&mdres->mutex);
1862                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1863                         ret = fill_mdres_info(mdres, async);
1864                         if (ret) {
1865                                 fprintf(stderr, "Error setting up restore\n");
1866                                 pthread_mutex_unlock(&mdres->mutex);
1867                                 free(async->buffer);
1868                                 free(async);
1869                                 return ret;
1870                         }
1871                 }
1872                 list_add_tail(&async->list, &mdres->list);
1873                 mdres->num_items++;
1874                 pthread_cond_signal(&mdres->cond);
1875                 pthread_mutex_unlock(&mdres->mutex);
1876         }
1877         if (bytenr & BLOCK_MASK) {
1878                 char buffer[BLOCK_MASK];
1879                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1880
1881                 bytenr += size;
1882                 ret = fread(buffer, size, 1, mdres->in);
1883                 if (ret != 1) {
1884                         fprintf(stderr, "Error reading in buffer %d\n", errno);
1885                         return -EIO;
1886                 }
1887         }
1888         *next = bytenr;
1889         return 0;
1890 }
1891
1892 static int wait_for_worker(struct mdrestore_struct *mdres)
1893 {
1894         int ret = 0;
1895
1896         pthread_mutex_lock(&mdres->mutex);
1897         ret = mdres->error;
1898         while (!ret && mdres->num_items > 0) {
1899                 struct timespec ts = {
1900                         .tv_sec = 0,
1901                         .tv_nsec = 10000000,
1902                 };
1903                 pthread_mutex_unlock(&mdres->mutex);
1904                 nanosleep(&ts, NULL);
1905                 pthread_mutex_lock(&mdres->mutex);
1906                 ret = mdres->error;
1907         }
1908         pthread_mutex_unlock(&mdres->mutex);
1909         return ret;
1910 }
1911
1912 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
1913                             u64 bytenr, u64 item_bytenr, u32 bufsize,
1914                             u64 cluster_bytenr)
1915 {
1916         struct extent_buffer *eb;
1917         int ret = 0;
1918         int i;
1919
1920         eb = alloc_dummy_eb(bytenr, mdres->leafsize);
1921         if (!eb) {
1922                 ret = -ENOMEM;
1923                 goto out;
1924         }
1925
1926         while (item_bytenr != bytenr) {
1927                 buffer += mdres->leafsize;
1928                 item_bytenr += mdres->leafsize;
1929         }
1930
1931         memcpy(eb->data, buffer, mdres->leafsize);
1932         if (btrfs_header_bytenr(eb) != bytenr) {
1933                 fprintf(stderr, "Eb bytenr doesn't match found bytenr\n");
1934                 ret = -EIO;
1935                 goto out;
1936         }
1937
1938         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
1939                    BTRFS_FSID_SIZE)) {
1940                 fprintf(stderr, "Fsid doesn't match\n");
1941                 ret = -EIO;
1942                 goto out;
1943         }
1944
1945         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
1946                 fprintf(stderr, "Does not belong to the chunk tree\n");
1947                 ret = -EIO;
1948                 goto out;
1949         }
1950
1951         for (i = 0; i < btrfs_header_nritems(eb); i++) {
1952                 struct btrfs_chunk chunk;
1953                 struct fs_chunk *fs_chunk;
1954                 struct btrfs_key key;
1955
1956                 if (btrfs_header_level(eb)) {
1957                         u64 blockptr = btrfs_node_blockptr(eb, i);
1958
1959                         ret = search_for_chunk_blocks(mdres, blockptr,
1960                                                       cluster_bytenr);
1961                         if (ret)
1962                                 break;
1963                         continue;
1964                 }
1965
1966                 /* Yay a leaf!  We loves leafs! */
1967                 btrfs_item_key_to_cpu(eb, &key, i);
1968                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
1969                         continue;
1970
1971                 fs_chunk = malloc(sizeof(struct fs_chunk));
1972                 if (!fs_chunk) {
1973                         fprintf(stderr, "Erorr allocating chunk\n");
1974                         ret = -ENOMEM;
1975                         break;
1976                 }
1977                 memset(fs_chunk, 0, sizeof(*fs_chunk));
1978                 read_extent_buffer(eb, &chunk, btrfs_item_ptr_offset(eb, i),
1979                                    sizeof(chunk));
1980
1981                 fs_chunk->logical = key.offset;
1982                 fs_chunk->physical = btrfs_stack_stripe_offset(&chunk.stripe);
1983                 fs_chunk->bytes = btrfs_stack_chunk_length(&chunk);
1984                 tree_insert(&mdres->chunk_tree, &fs_chunk->n, chunk_cmp);
1985         }
1986 out:
1987         free(eb);
1988         return ret;
1989 }
1990
1991 /* If you have to ask you aren't worthy */
1992 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
1993                                    u64 search, u64 cluster_bytenr)
1994 {
1995         struct meta_cluster *cluster;
1996         struct meta_cluster_header *header;
1997         struct meta_cluster_item *item;
1998         u64 current_cluster = cluster_bytenr, bytenr;
1999         u64 item_bytenr;
2000         u32 bufsize, nritems, i;
2001         u32 max_size = MAX_PENDING_SIZE * 2;
2002         u8 *buffer, *tmp = NULL;
2003         int ret = 0;
2004
2005         cluster = malloc(BLOCK_SIZE);
2006         if (!cluster) {
2007                 fprintf(stderr, "Error allocating cluster\n");
2008                 return -ENOMEM;
2009         }
2010
2011         buffer = malloc(max_size);
2012         if (!buffer) {
2013                 fprintf(stderr, "Error allocing buffer\n");
2014                 free(cluster);
2015                 return -ENOMEM;
2016         }
2017
2018         if (mdres->compress_method == COMPRESS_ZLIB) {
2019                 tmp = malloc(max_size);
2020                 if (!tmp) {
2021                         fprintf(stderr, "Error allocing tmp buffer\n");
2022                         free(cluster);
2023                         free(buffer);
2024                         return -ENOMEM;
2025                 }
2026         }
2027
2028         bytenr = current_cluster;
2029         while (1) {
2030                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2031                         fprintf(stderr, "Error seeking: %d\n", errno);
2032                         ret = -EIO;
2033                         break;
2034                 }
2035
2036                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2037                 if (ret == 0) {
2038                         if (cluster_bytenr != 0) {
2039                                 cluster_bytenr = 0;
2040                                 current_cluster = 0;
2041                                 bytenr = 0;
2042                                 continue;
2043                         }
2044                         printf("ok this is where we screwed up?\n");
2045                         ret = -EIO;
2046                         break;
2047                 } else if (ret < 0) {
2048                         fprintf(stderr, "Error reading image\n");
2049                         break;
2050                 }
2051                 ret = 0;
2052
2053                 header = &cluster->header;
2054                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2055                     le64_to_cpu(header->bytenr) != current_cluster) {
2056                         fprintf(stderr, "bad header in metadump image\n");
2057                         ret = -EIO;
2058                         break;
2059                 }
2060
2061                 bytenr += BLOCK_SIZE;
2062                 nritems = le32_to_cpu(header->nritems);
2063                 for (i = 0; i < nritems; i++) {
2064                         size_t size;
2065
2066                         item = &cluster->items[i];
2067                         bufsize = le32_to_cpu(item->size);
2068                         item_bytenr = le64_to_cpu(item->bytenr);
2069
2070                         if (bufsize > max_size) {
2071                                 fprintf(stderr, "item %u size %u too big\n",
2072                                         i, bufsize);
2073                                 ret = -EIO;
2074                                 break;
2075                         }
2076
2077                         if (mdres->compress_method == COMPRESS_ZLIB) {
2078                                 ret = fread(tmp, bufsize, 1, mdres->in);
2079                                 if (ret != 1) {
2080                                         fprintf(stderr, "Error reading: %d\n",
2081                                                 errno);
2082                                         ret = -EIO;
2083                                         break;
2084                                 }
2085
2086                                 size = max_size;
2087                                 ret = uncompress(buffer,
2088                                                  (unsigned long *)&size, tmp,
2089                                                  bufsize);
2090                                 if (ret != Z_OK) {
2091                                         fprintf(stderr, "Error decompressing "
2092                                                 "%d\n", ret);
2093                                         ret = -EIO;
2094                                         break;
2095                                 }
2096                         } else {
2097                                 ret = fread(buffer, bufsize, 1, mdres->in);
2098                                 if (ret != 1) {
2099                                         fprintf(stderr, "Error reading: %d\n",
2100                                                 errno);
2101                                         ret = -EIO;
2102                                         break;
2103                                 }
2104                                 size = bufsize;
2105                         }
2106                         ret = 0;
2107
2108                         if (item_bytenr <= search &&
2109                             item_bytenr + size > search) {
2110                                 ret = read_chunk_block(mdres, buffer, search,
2111                                                        item_bytenr, size,
2112                                                        current_cluster);
2113                                 if (!ret)
2114                                         ret = 1;
2115                                 break;
2116                         }
2117                         bytenr += bufsize;
2118                 }
2119                 if (ret) {
2120                         if (ret > 0)
2121                                 ret = 0;
2122                         break;
2123                 }
2124                 if (bytenr & BLOCK_MASK)
2125                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2126                 current_cluster = bytenr;
2127         }
2128
2129         free(tmp);
2130         free(buffer);
2131         free(cluster);
2132         return ret;
2133 }
2134
2135 static int build_chunk_tree(struct mdrestore_struct *mdres,
2136                             struct meta_cluster *cluster)
2137 {
2138         struct btrfs_super_block *super;
2139         struct meta_cluster_header *header;
2140         struct meta_cluster_item *item = NULL;
2141         u64 chunk_root_bytenr = 0;
2142         u32 i, nritems;
2143         u64 bytenr = 0;
2144         u8 *buffer;
2145         int ret;
2146
2147         /* We can't seek with stdin so don't bother doing this */
2148         if (mdres->in == stdin)
2149                 return 0;
2150
2151         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2152         if (ret <= 0) {
2153                 fprintf(stderr, "Error reading in cluster: %d\n", errno);
2154                 return -EIO;
2155         }
2156         ret = 0;
2157
2158         header = &cluster->header;
2159         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2160             le64_to_cpu(header->bytenr) != 0) {
2161                 fprintf(stderr, "bad header in metadump image\n");
2162                 return -EIO;
2163         }
2164
2165         bytenr += BLOCK_SIZE;
2166         mdres->compress_method = header->compress;
2167         nritems = le32_to_cpu(header->nritems);
2168         for (i = 0; i < nritems; i++) {
2169                 item = &cluster->items[i];
2170
2171                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2172                         break;
2173                 bytenr += le32_to_cpu(item->size);
2174                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2175                         fprintf(stderr, "Error seeking: %d\n", errno);
2176                         return -EIO;
2177                 }
2178         }
2179
2180         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2181                 fprintf(stderr, "Huh, didn't find the super?\n");
2182                 return -EINVAL;
2183         }
2184
2185         buffer = malloc(le32_to_cpu(item->size));
2186         if (!buffer) {
2187                 fprintf(stderr, "Error allocing buffer\n");
2188                 return -ENOMEM;
2189         }
2190
2191         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2192         if (ret != 1) {
2193                 fprintf(stderr, "Error reading buffer: %d\n", errno);
2194                 free(buffer);
2195                 return -EIO;
2196         }
2197
2198         if (mdres->compress_method == COMPRESS_ZLIB) {
2199                 size_t size = MAX_PENDING_SIZE * 2;
2200                 u8 *tmp;
2201
2202                 tmp = malloc(MAX_PENDING_SIZE * 2);
2203                 if (!tmp) {
2204                         free(buffer);
2205                         return -ENOMEM;
2206                 }
2207                 ret = uncompress(tmp, (unsigned long *)&size,
2208                                  buffer, le32_to_cpu(item->size));
2209                 if (ret != Z_OK) {
2210                         fprintf(stderr, "Error decompressing %d\n", ret);
2211                         free(buffer);
2212                         free(tmp);
2213                         return -EIO;
2214                 }
2215                 free(buffer);
2216                 buffer = tmp;
2217         }
2218
2219         super = (struct btrfs_super_block *)buffer;
2220         chunk_root_bytenr = btrfs_super_chunk_root(super);
2221         mdres->leafsize = btrfs_super_leafsize(super);
2222         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2223         memcpy(mdres->uuid, super->dev_item.uuid,
2224                        BTRFS_UUID_SIZE);
2225         mdres->devid = le64_to_cpu(super->dev_item.devid);
2226         free(buffer);
2227
2228         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2229 }
2230
2231 static int __restore_metadump(const char *input, FILE *out, int old_restore,
2232                               int num_threads, int fixup_offset,
2233                               const char *target, int multi_devices)
2234 {
2235         struct meta_cluster *cluster = NULL;
2236         struct meta_cluster_header *header;
2237         struct mdrestore_struct mdrestore;
2238         struct btrfs_fs_info *info = NULL;
2239         u64 bytenr = 0;
2240         FILE *in = NULL;
2241         int ret = 0;
2242
2243         if (!strcmp(input, "-")) {
2244                 in = stdin;
2245         } else {
2246                 in = fopen(input, "r");
2247                 if (!in) {
2248                         perror("unable to open metadump image");
2249                         return 1;
2250                 }
2251         }
2252
2253         /* NOTE: open with write mode */
2254         if (fixup_offset) {
2255                 BUG_ON(!target);
2256                 info = open_ctree_fs_info(target, 0, 0,
2257                                           OPEN_CTREE_WRITES |
2258                                           OPEN_CTREE_RESTORE |
2259                                           OPEN_CTREE_PARTIAL);
2260                 if (!info) {
2261                         fprintf(stderr, "%s: open ctree failed\n", __func__);
2262                         ret = -EIO;
2263                         goto failed_open;
2264                 }
2265         }
2266
2267         cluster = malloc(BLOCK_SIZE);
2268         if (!cluster) {
2269                 fprintf(stderr, "Error allocating cluster\n");
2270                 ret = -ENOMEM;
2271                 goto failed_info;
2272         }
2273
2274         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2275                              fixup_offset, info, multi_devices);
2276         if (ret) {
2277                 fprintf(stderr, "Error initing mdrestore %d\n", ret);
2278                 goto failed_cluster;
2279         }
2280
2281         if (!multi_devices) {
2282                 ret = build_chunk_tree(&mdrestore, cluster);
2283                 if (ret)
2284                         goto out;
2285         }
2286
2287         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2288                 fprintf(stderr, "Error seeking %d\n", errno);
2289                 goto out;
2290         }
2291
2292         while (1) {
2293                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2294                 if (!ret)
2295                         break;
2296
2297                 header = &cluster->header;
2298                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2299                     le64_to_cpu(header->bytenr) != bytenr) {
2300                         fprintf(stderr, "bad header in metadump image\n");
2301                         ret = -EIO;
2302                         break;
2303                 }
2304                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2305                 if (ret) {
2306                         fprintf(stderr, "Error adding cluster\n");
2307                         break;
2308                 }
2309
2310                 ret = wait_for_worker(&mdrestore);
2311                 if (ret) {
2312                         fprintf(stderr, "One of the threads errored out %d\n",
2313                                 ret);
2314                         break;
2315                 }
2316         }
2317 out:
2318         mdrestore_destroy(&mdrestore, num_threads);
2319 failed_cluster:
2320         free(cluster);
2321 failed_info:
2322         if (fixup_offset && info)
2323                 close_ctree(info->chunk_root);
2324 failed_open:
2325         if (in != stdin)
2326                 fclose(in);
2327         return ret;
2328 }
2329
2330 static int restore_metadump(const char *input, FILE *out, int old_restore,
2331                             int num_threads, int multi_devices)
2332 {
2333         return __restore_metadump(input, out, old_restore, num_threads, 0, NULL,
2334                                   multi_devices);
2335 }
2336
2337 static int fixup_metadump(const char *input, FILE *out, int num_threads,
2338                           const char *target)
2339 {
2340         return __restore_metadump(input, out, 0, num_threads, 1, target, 1);
2341 }
2342
2343 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2344                                        const char *other_dev, u64 cur_devid)
2345 {
2346         struct btrfs_key key;
2347         struct extent_buffer *leaf;
2348         struct btrfs_path path;
2349         struct btrfs_dev_item *dev_item;
2350         struct btrfs_super_block *disk_super;
2351         char dev_uuid[BTRFS_UUID_SIZE];
2352         char fs_uuid[BTRFS_UUID_SIZE];
2353         u64 devid, type, io_align, io_width;
2354         u64 sector_size, total_bytes, bytes_used;
2355         char *buf;
2356         int fp;
2357         int ret;
2358
2359         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2360         key.type = BTRFS_DEV_ITEM_KEY;
2361         key.offset = cur_devid;
2362
2363         btrfs_init_path(&path);
2364         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2365         if (ret) {
2366                 fprintf(stderr, "search key fails\n");
2367                 exit(1);
2368         }
2369
2370         leaf = path.nodes[0];
2371         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2372                                   struct btrfs_dev_item);
2373
2374         devid = btrfs_device_id(leaf, dev_item);
2375         if (devid != cur_devid) {
2376                 printk("devid %llu mismatch with %llu\n", devid, cur_devid);
2377                 exit(1);
2378         }
2379
2380         type = btrfs_device_type(leaf, dev_item);
2381         io_align = btrfs_device_io_align(leaf, dev_item);
2382         io_width = btrfs_device_io_width(leaf, dev_item);
2383         sector_size = btrfs_device_sector_size(leaf, dev_item);
2384         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2385         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2386         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2387         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2388
2389         btrfs_release_path(&path);
2390
2391         printk("update disk super on %s devid=%llu\n", other_dev, devid);
2392
2393         /* update other devices' super block */
2394         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2395         if (fp < 0) {
2396                 fprintf(stderr, "could not open %s\n", other_dev);
2397                 exit(1);
2398         }
2399
2400         buf = malloc(BTRFS_SUPER_INFO_SIZE);
2401         if (!buf) {
2402                 ret = -ENOMEM;
2403                 exit(1);
2404         }
2405
2406         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2407
2408         disk_super = (struct btrfs_super_block *)buf;
2409         dev_item = &disk_super->dev_item;
2410
2411         btrfs_set_stack_device_type(dev_item, type);
2412         btrfs_set_stack_device_id(dev_item, devid);
2413         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2414         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2415         btrfs_set_stack_device_io_align(dev_item, io_align);
2416         btrfs_set_stack_device_io_width(dev_item, io_width);
2417         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2418         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2419         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2420         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2421
2422         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2423         if (ret != BTRFS_SUPER_INFO_SIZE) {
2424                 ret = -EIO;
2425                 goto out;
2426         }
2427
2428         write_backup_supers(fp, (u8 *)buf);
2429
2430 out:
2431         free(buf);
2432         close(fp);
2433         return 0;
2434 }
2435
2436 static void print_usage(void)
2437 {
2438         fprintf(stderr, "usage: btrfs-image [options] source target\n");
2439         fprintf(stderr, "\t-r      \trestore metadump image\n");
2440         fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
2441         fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
2442         fprintf(stderr, "\t-o      \tdon't mess with the chunk tree when restoring\n");
2443         fprintf(stderr, "\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2444         fprintf(stderr, "\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2445         exit(1);
2446 }
2447
2448 int main(int argc, char *argv[])
2449 {
2450         char *source;
2451         char *target;
2452         u64 num_threads = 0;
2453         u64 compress_level = 0;
2454         int create = 1;
2455         int old_restore = 0;
2456         int walk_trees = 0;
2457         int multi_devices = 0;
2458         int ret;
2459         int sanitize = 0;
2460         int dev_cnt = 0;
2461         FILE *out;
2462
2463         while (1) {
2464                 int c = getopt(argc, argv, "rc:t:oswm");
2465                 if (c < 0)
2466                         break;
2467                 switch (c) {
2468                 case 'r':
2469                         create = 0;
2470                         break;
2471                 case 't':
2472                         num_threads = arg_strtou64(optarg);
2473                         if (num_threads > 32)
2474                                 print_usage();
2475                         break;
2476                 case 'c':
2477                         compress_level = arg_strtou64(optarg);
2478                         if (compress_level > 9)
2479                                 print_usage();
2480                         break;
2481                 case 'o':
2482                         old_restore = 1;
2483                         break;
2484                 case 's':
2485                         sanitize++;
2486                         break;
2487                 case 'w':
2488                         walk_trees = 1;
2489                         break;
2490                 case 'm':
2491                         create = 0;
2492                         multi_devices = 1;
2493                         break;
2494                 default:
2495                         print_usage();
2496                 }
2497         }
2498
2499         if ((old_restore) && create)
2500                 print_usage();
2501
2502         argc = argc - optind;
2503         dev_cnt = argc - 1;
2504
2505         if (multi_devices && dev_cnt < 2)
2506                 print_usage();
2507         if (!multi_devices && dev_cnt != 1)
2508                 print_usage();
2509
2510         source = argv[optind];
2511         target = argv[optind + 1];
2512
2513         if (create && !strcmp(target, "-")) {
2514                 out = stdout;
2515         } else {
2516                 out = fopen(target, "w+");
2517                 if (!out) {
2518                         perror("unable to create target file");
2519                         exit(1);
2520                 }
2521         }
2522
2523         if (num_threads == 0 && compress_level > 0) {
2524                 num_threads = sysconf(_SC_NPROCESSORS_ONLN);
2525                 if (num_threads <= 0)
2526                         num_threads = 1;
2527         }
2528
2529         if (create)
2530                 ret = create_metadump(source, out, num_threads,
2531                                       compress_level, sanitize, walk_trees);
2532         else
2533                 ret = restore_metadump(source, out, old_restore, 1,
2534                                        multi_devices);
2535         if (ret) {
2536                 printk("%s failed (%s)\n", (create) ? "create" : "restore",
2537                        strerror(errno));
2538                 goto out;
2539         }
2540
2541          /* extended support for multiple devices */
2542         if (!create && multi_devices) {
2543                 struct btrfs_fs_info *info;
2544                 u64 total_devs;
2545                 int i;
2546
2547                 info = open_ctree_fs_info(target, 0, 0,
2548                                           OPEN_CTREE_PARTIAL |
2549                                           OPEN_CTREE_RESTORE);
2550                 if (!info) {
2551                         int e = errno;
2552                         fprintf(stderr, "unable to open %s error = %s\n",
2553                                 target, strerror(e));
2554                         return 1;
2555                 }
2556
2557                 total_devs = btrfs_super_num_devices(info->super_copy);
2558                 if (total_devs != dev_cnt) {
2559                         printk("it needs %llu devices but has only %d\n",
2560                                 total_devs, dev_cnt);
2561                         close_ctree(info->chunk_root);
2562                         goto out;
2563                 }
2564
2565                 /* update super block on other disks */
2566                 for (i = 2; i <= dev_cnt; i++) {
2567                         ret = update_disk_super_on_device(info,
2568                                         argv[optind + i], (u64)i);
2569                         if (ret) {
2570                                 printk("update disk super failed devid=%d (error=%d)\n",
2571                                         i, ret);
2572                                 close_ctree(info->chunk_root);
2573                                 exit(1);
2574                         }
2575                 }
2576
2577                 close_ctree(info->chunk_root);
2578
2579                 /* fix metadata block to map correct chunk */
2580                 ret = fixup_metadump(source, out, 1, target);
2581                 if (ret) {
2582                         fprintf(stderr, "fix metadump failed (error=%d)\n",
2583                                 ret);
2584                         exit(1);
2585                 }
2586         }
2587
2588 out:
2589         if (out == stdout)
2590                 fflush(out);
2591         else
2592                 fclose(out);
2593
2594         return !!ret;
2595 }