btrfs-progs: fi defrag: change default extent target size to 32 MiB
[platform/upstream/btrfs-progs.git] / btrfs-image.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38
39 #define HEADER_MAGIC            0xbd5c25e27295668bULL
40 #define MAX_PENDING_SIZE        (256 * 1024)
41 #define BLOCK_SIZE              1024
42 #define BLOCK_MASK              (BLOCK_SIZE - 1)
43
44 #define COMPRESS_NONE           0
45 #define COMPRESS_ZLIB           1
46
47 struct meta_cluster_item {
48         __le64 bytenr;
49         __le32 size;
50 } __attribute__ ((__packed__));
51
52 struct meta_cluster_header {
53         __le64 magic;
54         __le64 bytenr;
55         __le32 nritems;
56         u8 compress;
57 } __attribute__ ((__packed__));
58
59 /* cluster header + index items + buffers */
60 struct meta_cluster {
61         struct meta_cluster_header header;
62         struct meta_cluster_item items[];
63 } __attribute__ ((__packed__));
64
65 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
66                            sizeof(struct meta_cluster_item))
67
68 struct fs_chunk {
69         u64 logical;
70         u64 physical;
71         /*
72          * physical_dup only store additonal physical for BTRFS_BLOCK_GROUP_DUP
73          * currently restore only support single and DUP
74          * TODO: modify this structure and the function related to this
75          * structure for support RAID*
76          */
77         u64 physical_dup;
78         u64 bytes;
79         struct rb_node l;
80         struct rb_node p;
81         struct list_head list;
82 };
83
84 struct async_work {
85         struct list_head list;
86         struct list_head ordered;
87         u64 start;
88         u64 size;
89         u8 *buffer;
90         size_t bufsize;
91         int error;
92 };
93
94 struct metadump_struct {
95         struct btrfs_root *root;
96         FILE *out;
97
98         struct meta_cluster *cluster;
99
100         pthread_t *threads;
101         size_t num_threads;
102         pthread_mutex_t mutex;
103         pthread_cond_t cond;
104         struct rb_root name_tree;
105
106         struct list_head list;
107         struct list_head ordered;
108         size_t num_items;
109         size_t num_ready;
110
111         u64 pending_start;
112         u64 pending_size;
113
114         int compress_level;
115         int done;
116         int data;
117         int sanitize_names;
118
119         int error;
120 };
121
122 struct name {
123         struct rb_node n;
124         char *val;
125         char *sub;
126         u32 len;
127 };
128
129 struct mdrestore_struct {
130         FILE *in;
131         FILE *out;
132
133         pthread_t *threads;
134         size_t num_threads;
135         pthread_mutex_t mutex;
136         pthread_cond_t cond;
137
138         struct rb_root chunk_tree;
139         struct rb_root physical_tree;
140         struct list_head list;
141         struct list_head overlapping_chunks;
142         size_t num_items;
143         u32 nodesize;
144         u64 devid;
145         u64 alloced_chunks;
146         u64 last_physical_offset;
147         u8 uuid[BTRFS_UUID_SIZE];
148         u8 fsid[BTRFS_FSID_SIZE];
149
150         int compress_method;
151         int done;
152         int error;
153         int old_restore;
154         int fixup_offset;
155         int multi_devices;
156         int clear_space_cache;
157         struct btrfs_fs_info *info;
158 };
159
160 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
161                                    u64 search, u64 cluster_bytenr);
162 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
163
164 static void csum_block(u8 *buf, size_t len)
165 {
166         char result[BTRFS_CRC32_SIZE];
167         u32 crc = ~(u32)0;
168         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
169         btrfs_csum_final(crc, result);
170         memcpy(buf, result, BTRFS_CRC32_SIZE);
171 }
172
173 static int has_name(struct btrfs_key *key)
174 {
175         switch (key->type) {
176         case BTRFS_DIR_ITEM_KEY:
177         case BTRFS_DIR_INDEX_KEY:
178         case BTRFS_INODE_REF_KEY:
179         case BTRFS_INODE_EXTREF_KEY:
180         case BTRFS_XATTR_ITEM_KEY:
181                 return 1;
182         default:
183                 break;
184         }
185
186         return 0;
187 }
188
189 static char *generate_garbage(u32 name_len)
190 {
191         char *buf = malloc(name_len);
192         int i;
193
194         if (!buf)
195                 return NULL;
196
197         for (i = 0; i < name_len; i++) {
198                 char c = rand_range(94) + 33;
199
200                 if (c == '/')
201                         c++;
202                 buf[i] = c;
203         }
204
205         return buf;
206 }
207
208 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
209 {
210         struct name *entry = rb_entry(a, struct name, n);
211         struct name *ins = rb_entry(b, struct name, n);
212         u32 len;
213
214         len = min(ins->len, entry->len);
215         return memcmp(ins->val, entry->val, len);
216 }
217
218 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
219 {
220         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
221         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
222
223         if (fuzz && ins->logical >= entry->logical &&
224             ins->logical < entry->logical + entry->bytes)
225                 return 0;
226
227         if (ins->logical < entry->logical)
228                 return -1;
229         else if (ins->logical > entry->logical)
230                 return 1;
231         return 0;
232 }
233
234 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
235 {
236         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
237         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
238
239         if (fuzz && ins->physical >= entry->physical &&
240             ins->physical < entry->physical + entry->bytes)
241                 return 0;
242
243         if (fuzz && entry->physical >= ins->physical &&
244             entry->physical < ins->physical + ins->bytes)
245                 return 0;
246
247         if (ins->physical < entry->physical)
248                 return -1;
249         else if (ins->physical > entry->physical)
250                 return 1;
251         return 0;
252 }
253
254 static void tree_insert(struct rb_root *root, struct rb_node *ins,
255                         int (*cmp)(struct rb_node *a, struct rb_node *b,
256                                    int fuzz))
257 {
258         struct rb_node ** p = &root->rb_node;
259         struct rb_node * parent = NULL;
260         int dir;
261
262         while(*p) {
263                 parent = *p;
264
265                 dir = cmp(*p, ins, 1);
266                 if (dir < 0)
267                         p = &(*p)->rb_left;
268                 else if (dir > 0)
269                         p = &(*p)->rb_right;
270                 else
271                         BUG();
272         }
273
274         rb_link_node(ins, parent, p);
275         rb_insert_color(ins, root);
276 }
277
278 static struct rb_node *tree_search(struct rb_root *root,
279                                    struct rb_node *search,
280                                    int (*cmp)(struct rb_node *a,
281                                               struct rb_node *b, int fuzz),
282                                    int fuzz)
283 {
284         struct rb_node *n = root->rb_node;
285         int dir;
286
287         while (n) {
288                 dir = cmp(n, search, fuzz);
289                 if (dir < 0)
290                         n = n->rb_left;
291                 else if (dir > 0)
292                         n = n->rb_right;
293                 else
294                         return n;
295         }
296
297         return NULL;
298 }
299
300 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
301                                u64 *size, u64 *physical_dup)
302 {
303         struct fs_chunk *fs_chunk;
304         struct rb_node *entry;
305         struct fs_chunk search;
306         u64 offset;
307
308         if (logical == BTRFS_SUPER_INFO_OFFSET)
309                 return logical;
310
311         search.logical = logical;
312         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
313         if (!entry) {
314                 if (mdres->in != stdin)
315                         printf("Couldn't find a chunk, using logical\n");
316                 return logical;
317         }
318         fs_chunk = rb_entry(entry, struct fs_chunk, l);
319         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
320                 BUG();
321         offset = search.logical - fs_chunk->logical;
322
323         if (physical_dup) {
324                 /* Only in dup case, physical_dup is not equal to 0 */
325                 if (fs_chunk->physical_dup)
326                         *physical_dup = fs_chunk->physical_dup + offset;
327                 else
328                         *physical_dup = 0;
329         }
330
331         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
332         return fs_chunk->physical + offset;
333 }
334
335
336 static char *find_collision(struct metadump_struct *md, char *name,
337                             u32 name_len)
338 {
339         struct name *val;
340         struct rb_node *entry;
341         struct name tmp;
342         unsigned long checksum;
343         int found = 0;
344         int i;
345
346         tmp.val = name;
347         tmp.len = name_len;
348         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
349         if (entry) {
350                 val = rb_entry(entry, struct name, n);
351                 free(name);
352                 return val->sub;
353         }
354
355         val = malloc(sizeof(struct name));
356         if (!val) {
357                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
358                 free(name);
359                 return NULL;
360         }
361
362         memset(val, 0, sizeof(*val));
363
364         val->val = name;
365         val->len = name_len;
366         val->sub = malloc(name_len);
367         if (!val->sub) {
368                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
369                 free(val);
370                 free(name);
371                 return NULL;
372         }
373
374         checksum = crc32c(~1, val->val, name_len);
375         memset(val->sub, ' ', name_len);
376         i = 0;
377         while (1) {
378                 if (crc32c(~1, val->sub, name_len) == checksum &&
379                     memcmp(val->sub, val->val, val->len)) {
380                         found = 1;
381                         break;
382                 }
383
384                 if (val->sub[i] == 127) {
385                         do {
386                                 i++;
387                                 if (i >= name_len)
388                                         break;
389                         } while (val->sub[i] == 127);
390
391                         if (i >= name_len)
392                                 break;
393                         val->sub[i]++;
394                         if (val->sub[i] == '/')
395                                 val->sub[i]++;
396                         memset(val->sub, ' ', i);
397                         i = 0;
398                         continue;
399                 } else {
400                         val->sub[i]++;
401                         if (val->sub[i] == '/')
402                                 val->sub[i]++;
403                 }
404         }
405
406         if (!found) {
407                 fprintf(stderr, "Couldn't find a collision for '%.*s', "
408                         "generating normal garbage, it won't match indexes\n",
409                         val->len, val->val);
410                 for (i = 0; i < name_len; i++) {
411                         char c = rand_range(94) + 33;
412
413                         if (c == '/')
414                                 c++;
415                         val->sub[i] = c;
416                 }
417         }
418
419         tree_insert(&md->name_tree, &val->n, name_cmp);
420         return val->sub;
421 }
422
423 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
424                               int slot)
425 {
426         struct btrfs_dir_item *dir_item;
427         char *buf;
428         char *garbage;
429         unsigned long name_ptr;
430         u32 total_len;
431         u32 cur = 0;
432         u32 this_len;
433         u32 name_len;
434         int free_garbage = (md->sanitize_names == 1);
435
436         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
437         total_len = btrfs_item_size_nr(eb, slot);
438         while (cur < total_len) {
439                 this_len = sizeof(*dir_item) +
440                         btrfs_dir_name_len(eb, dir_item) +
441                         btrfs_dir_data_len(eb, dir_item);
442                 name_ptr = (unsigned long)(dir_item + 1);
443                 name_len = btrfs_dir_name_len(eb, dir_item);
444
445                 if (md->sanitize_names > 1) {
446                         buf = malloc(name_len);
447                         if (!buf) {
448                                 fprintf(stderr, "Couldn't sanitize name, "
449                                         "enomem\n");
450                                 return;
451                         }
452                         read_extent_buffer(eb, buf, name_ptr, name_len);
453                         garbage = find_collision(md, buf, name_len);
454                 } else {
455                         garbage = generate_garbage(name_len);
456                 }
457                 if (!garbage) {
458                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
459                         return;
460                 }
461                 write_extent_buffer(eb, garbage, name_ptr, name_len);
462                 cur += this_len;
463                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
464                                                      this_len);
465                 if (free_garbage)
466                         free(garbage);
467         }
468 }
469
470 static void sanitize_inode_ref(struct metadump_struct *md,
471                                struct extent_buffer *eb, int slot, int ext)
472 {
473         struct btrfs_inode_extref *extref;
474         struct btrfs_inode_ref *ref;
475         char *garbage, *buf;
476         unsigned long ptr;
477         unsigned long name_ptr;
478         u32 item_size;
479         u32 cur_offset = 0;
480         int len;
481         int free_garbage = (md->sanitize_names == 1);
482
483         item_size = btrfs_item_size_nr(eb, slot);
484         ptr = btrfs_item_ptr_offset(eb, slot);
485         while (cur_offset < item_size) {
486                 if (ext) {
487                         extref = (struct btrfs_inode_extref *)(ptr +
488                                                                cur_offset);
489                         name_ptr = (unsigned long)(&extref->name);
490                         len = btrfs_inode_extref_name_len(eb, extref);
491                         cur_offset += sizeof(*extref);
492                 } else {
493                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
494                         len = btrfs_inode_ref_name_len(eb, ref);
495                         name_ptr = (unsigned long)(ref + 1);
496                         cur_offset += sizeof(*ref);
497                 }
498                 cur_offset += len;
499
500                 if (md->sanitize_names > 1) {
501                         buf = malloc(len);
502                         if (!buf) {
503                                 fprintf(stderr, "Couldn't sanitize name, "
504                                         "enomem\n");
505                                 return;
506                         }
507                         read_extent_buffer(eb, buf, name_ptr, len);
508                         garbage = find_collision(md, buf, len);
509                 } else {
510                         garbage = generate_garbage(len);
511                 }
512
513                 if (!garbage) {
514                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
515                         return;
516                 }
517                 write_extent_buffer(eb, garbage, name_ptr, len);
518                 if (free_garbage)
519                         free(garbage);
520         }
521 }
522
523 static void sanitize_xattr(struct metadump_struct *md,
524                            struct extent_buffer *eb, int slot)
525 {
526         struct btrfs_dir_item *dir_item;
527         unsigned long data_ptr;
528         u32 data_len;
529
530         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
531         data_len = btrfs_dir_data_len(eb, dir_item);
532
533         data_ptr = (unsigned long)((char *)(dir_item + 1) +
534                                    btrfs_dir_name_len(eb, dir_item));
535         memset_extent_buffer(eb, 0, data_ptr, data_len);
536 }
537
538 static void sanitize_name(struct metadump_struct *md, u8 *dst,
539                           struct extent_buffer *src, struct btrfs_key *key,
540                           int slot)
541 {
542         struct extent_buffer *eb;
543
544         eb = alloc_dummy_eb(src->start, src->len);
545         if (!eb) {
546                 fprintf(stderr, "Couldn't sanitize name, no memory\n");
547                 return;
548         }
549
550         memcpy(eb->data, dst, eb->len);
551
552         switch (key->type) {
553         case BTRFS_DIR_ITEM_KEY:
554         case BTRFS_DIR_INDEX_KEY:
555                 sanitize_dir_item(md, eb, slot);
556                 break;
557         case BTRFS_INODE_REF_KEY:
558                 sanitize_inode_ref(md, eb, slot, 0);
559                 break;
560         case BTRFS_INODE_EXTREF_KEY:
561                 sanitize_inode_ref(md, eb, slot, 1);
562                 break;
563         case BTRFS_XATTR_ITEM_KEY:
564                 sanitize_xattr(md, eb, slot);
565                 break;
566         default:
567                 break;
568         }
569
570         memcpy(dst, eb->data, eb->len);
571         free(eb);
572 }
573
574 /*
575  * zero inline extents and csum items
576  */
577 static void zero_items(struct metadump_struct *md, u8 *dst,
578                        struct extent_buffer *src)
579 {
580         struct btrfs_file_extent_item *fi;
581         struct btrfs_item *item;
582         struct btrfs_key key;
583         u32 nritems = btrfs_header_nritems(src);
584         size_t size;
585         unsigned long ptr;
586         int i, extent_type;
587
588         for (i = 0; i < nritems; i++) {
589                 item = btrfs_item_nr(i);
590                 btrfs_item_key_to_cpu(src, &key, i);
591                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
592                         size = btrfs_item_size_nr(src, i);
593                         memset(dst + btrfs_leaf_data(src) +
594                                btrfs_item_offset_nr(src, i), 0, size);
595                         continue;
596                 }
597
598                 if (md->sanitize_names && has_name(&key)) {
599                         sanitize_name(md, dst, src, &key, i);
600                         continue;
601                 }
602
603                 if (key.type != BTRFS_EXTENT_DATA_KEY)
604                         continue;
605
606                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
607                 extent_type = btrfs_file_extent_type(src, fi);
608                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
609                         continue;
610
611                 ptr = btrfs_file_extent_inline_start(fi);
612                 size = btrfs_file_extent_inline_item_len(src, item);
613                 memset(dst + ptr, 0, size);
614         }
615 }
616
617 /*
618  * copy buffer and zero useless data in the buffer
619  */
620 static void copy_buffer(struct metadump_struct *md, u8 *dst,
621                         struct extent_buffer *src)
622 {
623         int level;
624         size_t size;
625         u32 nritems;
626
627         memcpy(dst, src->data, src->len);
628         if (src->start == BTRFS_SUPER_INFO_OFFSET)
629                 return;
630
631         level = btrfs_header_level(src);
632         nritems = btrfs_header_nritems(src);
633
634         if (nritems == 0) {
635                 size = sizeof(struct btrfs_header);
636                 memset(dst + size, 0, src->len - size);
637         } else if (level == 0) {
638                 size = btrfs_leaf_data(src) +
639                         btrfs_item_offset_nr(src, nritems - 1) -
640                         btrfs_item_nr_offset(nritems);
641                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
642                 zero_items(md, dst, src);
643         } else {
644                 size = offsetof(struct btrfs_node, ptrs) +
645                         sizeof(struct btrfs_key_ptr) * nritems;
646                 memset(dst + size, 0, src->len - size);
647         }
648         csum_block(dst, src->len);
649 }
650
651 static void *dump_worker(void *data)
652 {
653         struct metadump_struct *md = (struct metadump_struct *)data;
654         struct async_work *async;
655         int ret;
656
657         while (1) {
658                 pthread_mutex_lock(&md->mutex);
659                 while (list_empty(&md->list)) {
660                         if (md->done) {
661                                 pthread_mutex_unlock(&md->mutex);
662                                 goto out;
663                         }
664                         pthread_cond_wait(&md->cond, &md->mutex);
665                 }
666                 async = list_entry(md->list.next, struct async_work, list);
667                 list_del_init(&async->list);
668                 pthread_mutex_unlock(&md->mutex);
669
670                 if (md->compress_level > 0) {
671                         u8 *orig = async->buffer;
672
673                         async->bufsize = compressBound(async->size);
674                         async->buffer = malloc(async->bufsize);
675                         if (!async->buffer) {
676                                 fprintf(stderr, "Error allocating buffer\n");
677                                 pthread_mutex_lock(&md->mutex);
678                                 if (!md->error)
679                                         md->error = -ENOMEM;
680                                 pthread_mutex_unlock(&md->mutex);
681                                 pthread_exit(NULL);
682                         }
683
684                         ret = compress2(async->buffer,
685                                          (unsigned long *)&async->bufsize,
686                                          orig, async->size, md->compress_level);
687
688                         if (ret != Z_OK)
689                                 async->error = 1;
690
691                         free(orig);
692                 }
693
694                 pthread_mutex_lock(&md->mutex);
695                 md->num_ready++;
696                 pthread_mutex_unlock(&md->mutex);
697         }
698 out:
699         pthread_exit(NULL);
700 }
701
702 static void meta_cluster_init(struct metadump_struct *md, u64 start)
703 {
704         struct meta_cluster_header *header;
705
706         md->num_items = 0;
707         md->num_ready = 0;
708         header = &md->cluster->header;
709         header->magic = cpu_to_le64(HEADER_MAGIC);
710         header->bytenr = cpu_to_le64(start);
711         header->nritems = cpu_to_le32(0);
712         header->compress = md->compress_level > 0 ?
713                            COMPRESS_ZLIB : COMPRESS_NONE;
714 }
715
716 static void metadump_destroy(struct metadump_struct *md, int num_threads)
717 {
718         int i;
719         struct rb_node *n;
720
721         pthread_mutex_lock(&md->mutex);
722         md->done = 1;
723         pthread_cond_broadcast(&md->cond);
724         pthread_mutex_unlock(&md->mutex);
725
726         for (i = 0; i < num_threads; i++)
727                 pthread_join(md->threads[i], NULL);
728
729         pthread_cond_destroy(&md->cond);
730         pthread_mutex_destroy(&md->mutex);
731
732         while ((n = rb_first(&md->name_tree))) {
733                 struct name *name;
734
735                 name = rb_entry(n, struct name, n);
736                 rb_erase(n, &md->name_tree);
737                 free(name->val);
738                 free(name->sub);
739                 free(name);
740         }
741         free(md->threads);
742         free(md->cluster);
743 }
744
745 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
746                          FILE *out, int num_threads, int compress_level,
747                          int sanitize_names)
748 {
749         int i, ret = 0;
750
751         memset(md, 0, sizeof(*md));
752         md->cluster = calloc(1, BLOCK_SIZE);
753         if (!md->cluster)
754                 return -ENOMEM;
755         md->threads = calloc(num_threads, sizeof(pthread_t));
756         if (!md->threads) {
757                 free(md->cluster);
758                 return -ENOMEM;
759         }
760         INIT_LIST_HEAD(&md->list);
761         INIT_LIST_HEAD(&md->ordered);
762         md->root = root;
763         md->out = out;
764         md->pending_start = (u64)-1;
765         md->compress_level = compress_level;
766         md->sanitize_names = sanitize_names;
767         if (sanitize_names > 1)
768                 crc32c_optimization_init();
769
770         md->name_tree.rb_node = NULL;
771         md->num_threads = num_threads;
772         pthread_cond_init(&md->cond, NULL);
773         pthread_mutex_init(&md->mutex, NULL);
774         meta_cluster_init(md, 0);
775
776         if (!num_threads)
777                 return 0;
778
779         for (i = 0; i < num_threads; i++) {
780                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
781                 if (ret)
782                         break;
783         }
784
785         if (ret)
786                 metadump_destroy(md, i + 1);
787
788         return ret;
789 }
790
791 static int write_zero(FILE *out, size_t size)
792 {
793         static char zero[BLOCK_SIZE];
794         return fwrite(zero, size, 1, out);
795 }
796
797 static int write_buffers(struct metadump_struct *md, u64 *next)
798 {
799         struct meta_cluster_header *header = &md->cluster->header;
800         struct meta_cluster_item *item;
801         struct async_work *async;
802         u64 bytenr = 0;
803         u32 nritems = 0;
804         int ret;
805         int err = 0;
806
807         if (list_empty(&md->ordered))
808                 goto out;
809
810         /* wait until all buffers are compressed */
811         while (!err && md->num_items > md->num_ready) {
812                 struct timespec ts = {
813                         .tv_sec = 0,
814                         .tv_nsec = 10000000,
815                 };
816                 pthread_mutex_unlock(&md->mutex);
817                 nanosleep(&ts, NULL);
818                 pthread_mutex_lock(&md->mutex);
819                 err = md->error;
820         }
821
822         if (err) {
823                 fprintf(stderr, "One of the threads errored out %s\n",
824                                 strerror(err));
825                 goto out;
826         }
827
828         /* setup and write index block */
829         list_for_each_entry(async, &md->ordered, ordered) {
830                 item = md->cluster->items + nritems;
831                 item->bytenr = cpu_to_le64(async->start);
832                 item->size = cpu_to_le32(async->bufsize);
833                 nritems++;
834         }
835         header->nritems = cpu_to_le32(nritems);
836
837         ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
838         if (ret != 1) {
839                 fprintf(stderr, "Error writing out cluster: %d\n", errno);
840                 return -EIO;
841         }
842
843         /* write buffers */
844         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
845         while (!list_empty(&md->ordered)) {
846                 async = list_entry(md->ordered.next, struct async_work,
847                                    ordered);
848                 list_del_init(&async->ordered);
849
850                 bytenr += async->bufsize;
851                 if (!err)
852                         ret = fwrite(async->buffer, async->bufsize, 1,
853                                      md->out);
854                 if (ret != 1) {
855                         err = -EIO;
856                         ret = 0;
857                         fprintf(stderr, "Error writing out cluster: %d\n",
858                                 errno);
859                 }
860
861                 free(async->buffer);
862                 free(async);
863         }
864
865         /* zero unused space in the last block */
866         if (!err && bytenr & BLOCK_MASK) {
867                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
868
869                 bytenr += size;
870                 ret = write_zero(md->out, size);
871                 if (ret != 1) {
872                         fprintf(stderr, "Error zeroing out buffer: %d\n",
873                                 errno);
874                         err = -EIO;
875                 }
876         }
877 out:
878         *next = bytenr;
879         return err;
880 }
881
882 static int read_data_extent(struct metadump_struct *md,
883                             struct async_work *async)
884 {
885         struct btrfs_root *root = md->root;
886         u64 bytes_left = async->size;
887         u64 logical = async->start;
888         u64 offset = 0;
889         u64 read_len;
890         int num_copies;
891         int cur_mirror;
892         int ret;
893
894         num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, logical,
895                                       bytes_left);
896
897         /* Try our best to read data, just like read_tree_block() */
898         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
899                 while (bytes_left) {
900                         read_len = bytes_left;
901                         ret = read_extent_data(root,
902                                         (char *)(async->buffer + offset),
903                                         logical, &read_len, cur_mirror);
904                         if (ret < 0)
905                                 break;
906                         offset += read_len;
907                         logical += read_len;
908                         bytes_left -= read_len;
909                 }
910         }
911         if (bytes_left)
912                 return -EIO;
913         return 0;
914 }
915
916 static int get_dev_fd(struct btrfs_root *root)
917 {
918         struct btrfs_device *dev;
919
920         dev = list_first_entry(&root->fs_info->fs_devices->devices,
921                                struct btrfs_device, dev_list);
922         return dev->fd;
923 }
924
925 static int flush_pending(struct metadump_struct *md, int done)
926 {
927         struct async_work *async = NULL;
928         struct extent_buffer *eb;
929         u64 blocksize = md->root->nodesize;
930         u64 start;
931         u64 size;
932         size_t offset;
933         int ret = 0;
934
935         if (md->pending_size) {
936                 async = calloc(1, sizeof(*async));
937                 if (!async)
938                         return -ENOMEM;
939
940                 async->start = md->pending_start;
941                 async->size = md->pending_size;
942                 async->bufsize = async->size;
943                 async->buffer = malloc(async->bufsize);
944                 if (!async->buffer) {
945                         free(async);
946                         return -ENOMEM;
947                 }
948                 offset = 0;
949                 start = async->start;
950                 size = async->size;
951
952                 if (md->data) {
953                         ret = read_data_extent(md, async);
954                         if (ret) {
955                                 free(async->buffer);
956                                 free(async);
957                                 return ret;
958                         }
959                 }
960
961                 /*
962                  * Balance can make the mapping not cover the super block, so
963                  * just copy directly from one of the devices.
964                  */
965                 if (start == BTRFS_SUPER_INFO_OFFSET) {
966                         int fd = get_dev_fd(md->root);
967
968                         ret = pread64(fd, async->buffer, size, start);
969                         if (ret < size) {
970                                 free(async->buffer);
971                                 free(async);
972                                 fprintf(stderr, "Error reading superblock\n");
973                                 return -EIO;
974                         }
975                         size = 0;
976                         ret = 0;
977                 }
978
979                 while (!md->data && size > 0) {
980                         u64 this_read = min(blocksize, size);
981                         eb = read_tree_block(md->root, start, this_read, 0);
982                         if (!extent_buffer_uptodate(eb)) {
983                                 free(async->buffer);
984                                 free(async);
985                                 fprintf(stderr,
986                                         "Error reading metadata block\n");
987                                 return -EIO;
988                         }
989                         copy_buffer(md, async->buffer + offset, eb);
990                         free_extent_buffer(eb);
991                         start += this_read;
992                         offset += this_read;
993                         size -= this_read;
994                 }
995
996                 md->pending_start = (u64)-1;
997                 md->pending_size = 0;
998         } else if (!done) {
999                 return 0;
1000         }
1001
1002         pthread_mutex_lock(&md->mutex);
1003         if (async) {
1004                 list_add_tail(&async->ordered, &md->ordered);
1005                 md->num_items++;
1006                 if (md->compress_level > 0) {
1007                         list_add_tail(&async->list, &md->list);
1008                         pthread_cond_signal(&md->cond);
1009                 } else {
1010                         md->num_ready++;
1011                 }
1012         }
1013         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1014                 ret = write_buffers(md, &start);
1015                 if (ret)
1016                         fprintf(stderr, "Error writing buffers %d\n",
1017                                 errno);
1018                 else
1019                         meta_cluster_init(md, start);
1020         }
1021         pthread_mutex_unlock(&md->mutex);
1022         return ret;
1023 }
1024
1025 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1026                       int data)
1027 {
1028         int ret;
1029         if (md->data != data ||
1030             md->pending_size + size > MAX_PENDING_SIZE ||
1031             md->pending_start + md->pending_size != start) {
1032                 ret = flush_pending(md, 0);
1033                 if (ret)
1034                         return ret;
1035                 md->pending_start = start;
1036         }
1037         readahead_tree_block(md->root, start, size, 0);
1038         md->pending_size += size;
1039         md->data = data;
1040         return 0;
1041 }
1042
1043 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1044 static int is_tree_block(struct btrfs_root *extent_root,
1045                          struct btrfs_path *path, u64 bytenr)
1046 {
1047         struct extent_buffer *leaf;
1048         struct btrfs_key key;
1049         u64 ref_objectid;
1050         int ret;
1051
1052         leaf = path->nodes[0];
1053         while (1) {
1054                 struct btrfs_extent_ref_v0 *ref_item;
1055                 path->slots[0]++;
1056                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1057                         ret = btrfs_next_leaf(extent_root, path);
1058                         if (ret < 0)
1059                                 return ret;
1060                         if (ret > 0)
1061                                 break;
1062                         leaf = path->nodes[0];
1063                 }
1064                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1065                 if (key.objectid != bytenr)
1066                         break;
1067                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1068                         continue;
1069                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1070                                           struct btrfs_extent_ref_v0);
1071                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1072                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1073                         return 1;
1074                 break;
1075         }
1076         return 0;
1077 }
1078 #endif
1079
1080 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1081                             struct metadump_struct *metadump, int root_tree)
1082 {
1083         struct extent_buffer *tmp;
1084         struct btrfs_root_item *ri;
1085         struct btrfs_key key;
1086         u64 bytenr;
1087         int level;
1088         int nritems = 0;
1089         int i = 0;
1090         int ret;
1091
1092         ret = add_extent(btrfs_header_bytenr(eb), root->nodesize, metadump, 0);
1093         if (ret) {
1094                 fprintf(stderr, "Error adding metadata block\n");
1095                 return ret;
1096         }
1097
1098         if (btrfs_header_level(eb) == 0 && !root_tree)
1099                 return 0;
1100
1101         level = btrfs_header_level(eb);
1102         nritems = btrfs_header_nritems(eb);
1103         for (i = 0; i < nritems; i++) {
1104                 if (level == 0) {
1105                         btrfs_item_key_to_cpu(eb, &key, i);
1106                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1107                                 continue;
1108                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1109                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1110                         tmp = read_tree_block(root, bytenr, root->nodesize, 0);
1111                         if (!extent_buffer_uptodate(tmp)) {
1112                                 fprintf(stderr,
1113                                         "Error reading log root block\n");
1114                                 return -EIO;
1115                         }
1116                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1117                         free_extent_buffer(tmp);
1118                         if (ret)
1119                                 return ret;
1120                 } else {
1121                         bytenr = btrfs_node_blockptr(eb, i);
1122                         tmp = read_tree_block(root, bytenr, root->nodesize, 0);
1123                         if (!extent_buffer_uptodate(tmp)) {
1124                                 fprintf(stderr, "Error reading log block\n");
1125                                 return -EIO;
1126                         }
1127                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1128                         free_extent_buffer(tmp);
1129                         if (ret)
1130                                 return ret;
1131                 }
1132         }
1133
1134         return 0;
1135 }
1136
1137 static int copy_log_trees(struct btrfs_root *root,
1138                           struct metadump_struct *metadump,
1139                           struct btrfs_path *path)
1140 {
1141         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1142
1143         if (blocknr == 0)
1144                 return 0;
1145
1146         if (!root->fs_info->log_root_tree ||
1147             !root->fs_info->log_root_tree->node) {
1148                 fprintf(stderr, "Error copying tree log, it wasn't setup\n");
1149                 return -EIO;
1150         }
1151
1152         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1153                                 metadump, 1);
1154 }
1155
1156 static int copy_space_cache(struct btrfs_root *root,
1157                             struct metadump_struct *metadump,
1158                             struct btrfs_path *path)
1159 {
1160         struct extent_buffer *leaf;
1161         struct btrfs_file_extent_item *fi;
1162         struct btrfs_key key;
1163         u64 bytenr, num_bytes;
1164         int ret;
1165
1166         root = root->fs_info->tree_root;
1167
1168         key.objectid = 0;
1169         key.type = BTRFS_EXTENT_DATA_KEY;
1170         key.offset = 0;
1171
1172         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1173         if (ret < 0) {
1174                 fprintf(stderr, "Error searching for free space inode %d\n",
1175                         ret);
1176                 return ret;
1177         }
1178
1179         leaf = path->nodes[0];
1180
1181         while (1) {
1182                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1183                         ret = btrfs_next_leaf(root, path);
1184                         if (ret < 0) {
1185                                 fprintf(stderr, "Error going to next leaf "
1186                                         "%d\n", ret);
1187                                 return ret;
1188                         }
1189                         if (ret > 0)
1190                                 break;
1191                         leaf = path->nodes[0];
1192                 }
1193
1194                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1195                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1196                         path->slots[0]++;
1197                         continue;
1198                 }
1199
1200                 fi = btrfs_item_ptr(leaf, path->slots[0],
1201                                     struct btrfs_file_extent_item);
1202                 if (btrfs_file_extent_type(leaf, fi) !=
1203                     BTRFS_FILE_EXTENT_REG) {
1204                         path->slots[0]++;
1205                         continue;
1206                 }
1207
1208                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1209                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1210                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1211                 if (ret) {
1212                         fprintf(stderr, "Error adding space cache blocks %d\n",
1213                                 ret);
1214                         btrfs_release_path(path);
1215                         return ret;
1216                 }
1217                 path->slots[0]++;
1218         }
1219
1220         return 0;
1221 }
1222
1223 static int copy_from_extent_tree(struct metadump_struct *metadump,
1224                                  struct btrfs_path *path)
1225 {
1226         struct btrfs_root *extent_root;
1227         struct extent_buffer *leaf;
1228         struct btrfs_extent_item *ei;
1229         struct btrfs_key key;
1230         u64 bytenr;
1231         u64 num_bytes;
1232         int ret;
1233
1234         extent_root = metadump->root->fs_info->extent_root;
1235         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1236         key.objectid = bytenr;
1237         key.type = BTRFS_EXTENT_ITEM_KEY;
1238         key.offset = 0;
1239
1240         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1241         if (ret < 0) {
1242                 fprintf(stderr, "Error searching extent root %d\n", ret);
1243                 return ret;
1244         }
1245         ret = 0;
1246
1247         leaf = path->nodes[0];
1248
1249         while (1) {
1250                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1251                         ret = btrfs_next_leaf(extent_root, path);
1252                         if (ret < 0) {
1253                                 fprintf(stderr, "Error going to next leaf %d"
1254                                         "\n", ret);
1255                                 break;
1256                         }
1257                         if (ret > 0) {
1258                                 ret = 0;
1259                                 break;
1260                         }
1261                         leaf = path->nodes[0];
1262                 }
1263
1264                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1265                 if (key.objectid < bytenr ||
1266                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1267                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1268                         path->slots[0]++;
1269                         continue;
1270                 }
1271
1272                 bytenr = key.objectid;
1273                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1274                         num_bytes = extent_root->nodesize;
1275                 else
1276                         num_bytes = key.offset;
1277
1278                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1279                         ei = btrfs_item_ptr(leaf, path->slots[0],
1280                                             struct btrfs_extent_item);
1281                         if (btrfs_extent_flags(leaf, ei) &
1282                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1283                                 ret = add_extent(bytenr, num_bytes, metadump,
1284                                                  0);
1285                                 if (ret) {
1286                                         fprintf(stderr, "Error adding block "
1287                                                 "%d\n", ret);
1288                                         break;
1289                                 }
1290                         }
1291                 } else {
1292 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1293                         ret = is_tree_block(extent_root, path, bytenr);
1294                         if (ret < 0) {
1295                                 fprintf(stderr, "Error checking tree block "
1296                                         "%d\n", ret);
1297                                 break;
1298                         }
1299
1300                         if (ret) {
1301                                 ret = add_extent(bytenr, num_bytes, metadump,
1302                                                  0);
1303                                 if (ret) {
1304                                         fprintf(stderr, "Error adding block "
1305                                                 "%d\n", ret);
1306                                         break;
1307                                 }
1308                         }
1309                         ret = 0;
1310 #else
1311                         fprintf(stderr, "Either extent tree corruption or "
1312                                 "you haven't built with V0 support\n");
1313                         ret = -EIO;
1314                         break;
1315 #endif
1316                 }
1317                 bytenr += num_bytes;
1318         }
1319
1320         btrfs_release_path(path);
1321
1322         return ret;
1323 }
1324
1325 static int create_metadump(const char *input, FILE *out, int num_threads,
1326                            int compress_level, int sanitize, int walk_trees)
1327 {
1328         struct btrfs_root *root;
1329         struct btrfs_path *path = NULL;
1330         struct metadump_struct metadump;
1331         int ret;
1332         int err = 0;
1333
1334         root = open_ctree(input, 0, 0);
1335         if (!root) {
1336                 fprintf(stderr, "Open ctree failed\n");
1337                 return -EIO;
1338         }
1339
1340         ret = metadump_init(&metadump, root, out, num_threads,
1341                             compress_level, sanitize);
1342         if (ret) {
1343                 fprintf(stderr, "Error initializing metadump %d\n", ret);
1344                 close_ctree(root);
1345                 return ret;
1346         }
1347
1348         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1349                         &metadump, 0);
1350         if (ret) {
1351                 fprintf(stderr, "Error adding metadata %d\n", ret);
1352                 err = ret;
1353                 goto out;
1354         }
1355
1356         path = btrfs_alloc_path();
1357         if (!path) {
1358                 fprintf(stderr, "Out of memory allocating path\n");
1359                 err = -ENOMEM;
1360                 goto out;
1361         }
1362
1363         if (walk_trees) {
1364                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1365                                        &metadump, 1);
1366                 if (ret) {
1367                         err = ret;
1368                         goto out;
1369                 }
1370
1371                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1372                                        &metadump, 1);
1373                 if (ret) {
1374                         err = ret;
1375                         goto out;
1376                 }
1377         } else {
1378                 ret = copy_from_extent_tree(&metadump, path);
1379                 if (ret) {
1380                         err = ret;
1381                         goto out;
1382                 }
1383         }
1384
1385         ret = copy_log_trees(root, &metadump, path);
1386         if (ret) {
1387                 err = ret;
1388                 goto out;
1389         }
1390
1391         ret = copy_space_cache(root, &metadump, path);
1392 out:
1393         ret = flush_pending(&metadump, 1);
1394         if (ret) {
1395                 if (!err)
1396                         err = ret;
1397                 fprintf(stderr, "Error flushing pending %d\n", ret);
1398         }
1399
1400         metadump_destroy(&metadump, num_threads);
1401
1402         btrfs_free_path(path);
1403         ret = close_ctree(root);
1404         return err ? err : ret;
1405 }
1406
1407 static void update_super_old(u8 *buffer)
1408 {
1409         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1410         struct btrfs_chunk *chunk;
1411         struct btrfs_disk_key *key;
1412         u32 sectorsize = btrfs_super_sectorsize(super);
1413         u64 flags = btrfs_super_flags(super);
1414
1415         flags |= BTRFS_SUPER_FLAG_METADUMP;
1416         btrfs_set_super_flags(super, flags);
1417
1418         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1419         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1420                                        sizeof(struct btrfs_disk_key));
1421
1422         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1423         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1424         btrfs_set_disk_key_offset(key, 0);
1425
1426         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1427         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1428         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1429         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1430         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1431         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1432         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1433         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1434         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1435         chunk->stripe.devid = super->dev_item.devid;
1436         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1437         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1438         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1439         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1440 }
1441
1442 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1443 {
1444         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1445         struct btrfs_chunk *chunk;
1446         struct btrfs_disk_key *disk_key;
1447         struct btrfs_key key;
1448         u64 flags = btrfs_super_flags(super);
1449         u32 new_array_size = 0;
1450         u32 array_size;
1451         u32 cur = 0;
1452         u8 *ptr, *write_ptr;
1453         int old_num_stripes;
1454
1455         write_ptr = ptr = super->sys_chunk_array;
1456         array_size = btrfs_super_sys_array_size(super);
1457
1458         while (cur < array_size) {
1459                 disk_key = (struct btrfs_disk_key *)ptr;
1460                 btrfs_disk_key_to_cpu(&key, disk_key);
1461
1462                 new_array_size += sizeof(*disk_key);
1463                 memmove(write_ptr, ptr, sizeof(*disk_key));
1464
1465                 write_ptr += sizeof(*disk_key);
1466                 ptr += sizeof(*disk_key);
1467                 cur += sizeof(*disk_key);
1468
1469                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1470                         u64 type, physical, physical_dup, size = 0;
1471
1472                         chunk = (struct btrfs_chunk *)ptr;
1473                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1474                         chunk = (struct btrfs_chunk *)write_ptr;
1475
1476                         memmove(write_ptr, ptr, sizeof(*chunk));
1477                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1478                         type = btrfs_stack_chunk_type(chunk);
1479                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1480                                 new_array_size += sizeof(struct btrfs_stripe);
1481                                 write_ptr += sizeof(struct btrfs_stripe);
1482                         } else {
1483                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1484                                 btrfs_set_stack_chunk_type(chunk,
1485                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1486                         }
1487                         chunk->stripe.devid = super->dev_item.devid;
1488                         physical = logical_to_physical(mdres, key.offset,
1489                                                        &size, &physical_dup);
1490                         if (size != (u64)-1)
1491                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1492                                                               physical);
1493                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1494                                BTRFS_UUID_SIZE);
1495                         new_array_size += sizeof(*chunk);
1496                 } else {
1497                         fprintf(stderr, "Bogus key in the sys chunk array "
1498                                 "%d\n", key.type);
1499                         return -EIO;
1500                 }
1501                 write_ptr += sizeof(*chunk);
1502                 ptr += btrfs_chunk_item_size(old_num_stripes);
1503                 cur += btrfs_chunk_item_size(old_num_stripes);
1504         }
1505
1506         if (mdres->clear_space_cache)
1507                 btrfs_set_super_cache_generation(super, 0);
1508
1509         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1510         btrfs_set_super_flags(super, flags);
1511         btrfs_set_super_sys_array_size(super, new_array_size);
1512         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1513
1514         return 0;
1515 }
1516
1517 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1518 {
1519         struct extent_buffer *eb;
1520
1521         eb = calloc(1, sizeof(struct extent_buffer) + size);
1522         if (!eb)
1523                 return NULL;
1524
1525         eb->start = bytenr;
1526         eb->len = size;
1527         return eb;
1528 }
1529
1530 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1531 {
1532         struct btrfs_item *item;
1533         u32 nritems;
1534         u32 old_size;
1535         u32 old_data_start;
1536         u32 size_diff;
1537         u32 data_end;
1538         int i;
1539
1540         old_size = btrfs_item_size_nr(eb, slot);
1541         if (old_size == new_size)
1542                 return;
1543
1544         nritems = btrfs_header_nritems(eb);
1545         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1546
1547         old_data_start = btrfs_item_offset_nr(eb, slot);
1548         size_diff = old_size - new_size;
1549
1550         for (i = slot; i < nritems; i++) {
1551                 u32 ioff;
1552                 item = btrfs_item_nr(i);
1553                 ioff = btrfs_item_offset(eb, item);
1554                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1555         }
1556
1557         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1558                               btrfs_leaf_data(eb) + data_end,
1559                               old_data_start + new_size - data_end);
1560         item = btrfs_item_nr(slot);
1561         btrfs_set_item_size(eb, item, new_size);
1562 }
1563
1564 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1565                                   struct async_work *async, u8 *buffer,
1566                                   size_t size)
1567 {
1568         struct extent_buffer *eb;
1569         size_t size_left = size;
1570         u64 bytenr = async->start;
1571         int i;
1572
1573         if (size_left % mdres->nodesize)
1574                 return 0;
1575
1576         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1577         if (!eb)
1578                 return -ENOMEM;
1579
1580         while (size_left) {
1581                 eb->start = bytenr;
1582                 memcpy(eb->data, buffer, mdres->nodesize);
1583
1584                 if (btrfs_header_bytenr(eb) != bytenr)
1585                         break;
1586                 if (memcmp(mdres->fsid,
1587                            eb->data + offsetof(struct btrfs_header, fsid),
1588                            BTRFS_FSID_SIZE))
1589                         break;
1590
1591                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1592                         goto next;
1593
1594                 if (btrfs_header_level(eb) != 0)
1595                         goto next;
1596
1597                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1598                         struct btrfs_chunk *chunk;
1599                         struct btrfs_key key;
1600                         u64 type, physical, physical_dup, size = (u64)-1;
1601
1602                         btrfs_item_key_to_cpu(eb, &key, i);
1603                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1604                                 continue;
1605
1606                         size = 0;
1607                         physical = logical_to_physical(mdres, key.offset,
1608                                                        &size, &physical_dup);
1609
1610                         if (!physical_dup)
1611                                 truncate_item(eb, i, sizeof(*chunk));
1612                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1613
1614
1615                         /* Zero out the RAID profile */
1616                         type = btrfs_chunk_type(eb, chunk);
1617                         type &= (BTRFS_BLOCK_GROUP_DATA |
1618                                  BTRFS_BLOCK_GROUP_SYSTEM |
1619                                  BTRFS_BLOCK_GROUP_METADATA |
1620                                  BTRFS_BLOCK_GROUP_DUP);
1621                         btrfs_set_chunk_type(eb, chunk, type);
1622
1623                         if (!physical_dup)
1624                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1625                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1626                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1627                         if (size != (u64)-1)
1628                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1629                                                            physical);
1630                         /* update stripe 2 offset */
1631                         if (physical_dup)
1632                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1633                                                            physical_dup);
1634
1635                         write_extent_buffer(eb, mdres->uuid,
1636                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1637                                                 chunk, 0),
1638                                         BTRFS_UUID_SIZE);
1639                 }
1640                 memcpy(buffer, eb->data, eb->len);
1641                 csum_block(buffer, eb->len);
1642 next:
1643                 size_left -= mdres->nodesize;
1644                 buffer += mdres->nodesize;
1645                 bytenr += mdres->nodesize;
1646         }
1647
1648         free(eb);
1649         return 0;
1650 }
1651
1652 static void write_backup_supers(int fd, u8 *buf)
1653 {
1654         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1655         struct stat st;
1656         u64 size;
1657         u64 bytenr;
1658         int i;
1659         int ret;
1660
1661         if (fstat(fd, &st)) {
1662                 fprintf(stderr, "Couldn't stat restore point, won't be able "
1663                         "to write backup supers: %d\n", errno);
1664                 return;
1665         }
1666
1667         size = btrfs_device_size(fd, &st);
1668
1669         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1670                 bytenr = btrfs_sb_offset(i);
1671                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1672                         break;
1673                 btrfs_set_super_bytenr(super, bytenr);
1674                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1675                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1676                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1677                         if (ret < 0)
1678                                 fprintf(stderr, "Problem writing out backup "
1679                                         "super block %d, err %d\n", i, errno);
1680                         else
1681                                 fprintf(stderr, "Short write writing out "
1682                                         "backup super block\n");
1683                         break;
1684                 }
1685         }
1686 }
1687
1688 static void *restore_worker(void *data)
1689 {
1690         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1691         struct async_work *async;
1692         size_t size;
1693         u8 *buffer;
1694         u8 *outbuf;
1695         int outfd;
1696         int ret;
1697         int compress_size = MAX_PENDING_SIZE * 4;
1698
1699         outfd = fileno(mdres->out);
1700         buffer = malloc(compress_size);
1701         if (!buffer) {
1702                 fprintf(stderr, "Error allocating buffer\n");
1703                 pthread_mutex_lock(&mdres->mutex);
1704                 if (!mdres->error)
1705                         mdres->error = -ENOMEM;
1706                 pthread_mutex_unlock(&mdres->mutex);
1707                 pthread_exit(NULL);
1708         }
1709
1710         while (1) {
1711                 u64 bytenr, physical_dup;
1712                 off_t offset = 0;
1713                 int err = 0;
1714
1715                 pthread_mutex_lock(&mdres->mutex);
1716                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1717                         if (mdres->done) {
1718                                 pthread_mutex_unlock(&mdres->mutex);
1719                                 goto out;
1720                         }
1721                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1722                 }
1723                 async = list_entry(mdres->list.next, struct async_work, list);
1724                 list_del_init(&async->list);
1725                 pthread_mutex_unlock(&mdres->mutex);
1726
1727                 if (mdres->compress_method == COMPRESS_ZLIB) {
1728                         size = compress_size; 
1729                         ret = uncompress(buffer, (unsigned long *)&size,
1730                                          async->buffer, async->bufsize);
1731                         if (ret != Z_OK) {
1732                                 fprintf(stderr, "Error decompressing %d\n",
1733                                         ret);
1734                                 err = -EIO;
1735                         }
1736                         outbuf = buffer;
1737                 } else {
1738                         outbuf = async->buffer;
1739                         size = async->bufsize;
1740                 }
1741
1742                 if (!mdres->multi_devices) {
1743                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1744                                 if (mdres->old_restore) {
1745                                         update_super_old(outbuf);
1746                                 } else {
1747                                         ret = update_super(mdres, outbuf);
1748                                         if (ret)
1749                                                 err = ret;
1750                                 }
1751                         } else if (!mdres->old_restore) {
1752                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1753                                 if (ret)
1754                                         err = ret;
1755                         }
1756                 }
1757
1758                 if (!mdres->fixup_offset) {
1759                         while (size) {
1760                                 u64 chunk_size = size;
1761                                 physical_dup = 0;
1762                                 if (!mdres->multi_devices && !mdres->old_restore)
1763                                         bytenr = logical_to_physical(mdres,
1764                                                      async->start + offset,
1765                                                      &chunk_size,
1766                                                      &physical_dup);
1767                                 else
1768                                         bytenr = async->start + offset;
1769
1770                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1771                                                bytenr);
1772                                 if (ret != chunk_size)
1773                                         goto error;
1774
1775                                 if (physical_dup)
1776                                         ret = pwrite64(outfd, outbuf+offset,
1777                                                        chunk_size,
1778                                                        physical_dup);
1779                                 if (ret != chunk_size)
1780                                         goto error;
1781
1782                                 size -= chunk_size;
1783                                 offset += chunk_size;
1784                                 continue;
1785
1786 error:
1787                                 if (ret < 0) {
1788                                         fprintf(stderr, "Error writing to device %d\n",
1789                                                         errno);
1790                                         err = errno;
1791                                 } else {
1792                                         fprintf(stderr, "Short write\n");
1793                                         err = -EIO;
1794                                 }
1795                         }
1796                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1797                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1798                         if (ret) {
1799                                 printk("Error write data\n");
1800                                 exit(1);
1801                         }
1802                 }
1803
1804
1805                 /* backup super blocks are already there at fixup_offset stage */
1806                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1807                         write_backup_supers(outfd, outbuf);
1808
1809                 pthread_mutex_lock(&mdres->mutex);
1810                 if (err && !mdres->error)
1811                         mdres->error = err;
1812                 mdres->num_items--;
1813                 pthread_mutex_unlock(&mdres->mutex);
1814
1815                 free(async->buffer);
1816                 free(async);
1817         }
1818 out:
1819         free(buffer);
1820         pthread_exit(NULL);
1821 }
1822
1823 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1824 {
1825         struct rb_node *n;
1826         int i;
1827
1828         while ((n = rb_first(&mdres->chunk_tree))) {
1829                 struct fs_chunk *entry;
1830
1831                 entry = rb_entry(n, struct fs_chunk, l);
1832                 rb_erase(n, &mdres->chunk_tree);
1833                 rb_erase(&entry->p, &mdres->physical_tree);
1834                 free(entry);
1835         }
1836         pthread_mutex_lock(&mdres->mutex);
1837         mdres->done = 1;
1838         pthread_cond_broadcast(&mdres->cond);
1839         pthread_mutex_unlock(&mdres->mutex);
1840
1841         for (i = 0; i < num_threads; i++)
1842                 pthread_join(mdres->threads[i], NULL);
1843
1844         pthread_cond_destroy(&mdres->cond);
1845         pthread_mutex_destroy(&mdres->mutex);
1846         free(mdres->threads);
1847 }
1848
1849 static int mdrestore_init(struct mdrestore_struct *mdres,
1850                           FILE *in, FILE *out, int old_restore,
1851                           int num_threads, int fixup_offset,
1852                           struct btrfs_fs_info *info, int multi_devices)
1853 {
1854         int i, ret = 0;
1855
1856         memset(mdres, 0, sizeof(*mdres));
1857         pthread_cond_init(&mdres->cond, NULL);
1858         pthread_mutex_init(&mdres->mutex, NULL);
1859         INIT_LIST_HEAD(&mdres->list);
1860         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1861         mdres->in = in;
1862         mdres->out = out;
1863         mdres->old_restore = old_restore;
1864         mdres->chunk_tree.rb_node = NULL;
1865         mdres->fixup_offset = fixup_offset;
1866         mdres->info = info;
1867         mdres->multi_devices = multi_devices;
1868         mdres->clear_space_cache = 0;
1869         mdres->last_physical_offset = 0;
1870         mdres->alloced_chunks = 0;
1871
1872         if (!num_threads)
1873                 return 0;
1874
1875         mdres->num_threads = num_threads;
1876         mdres->threads = calloc(num_threads, sizeof(pthread_t));
1877         if (!mdres->threads)
1878                 return -ENOMEM;
1879         for (i = 0; i < num_threads; i++) {
1880                 ret = pthread_create(mdres->threads + i, NULL, restore_worker,
1881                                      mdres);
1882                 if (ret)
1883                         break;
1884         }
1885         if (ret)
1886                 mdrestore_destroy(mdres, i + 1);
1887         return ret;
1888 }
1889
1890 static int fill_mdres_info(struct mdrestore_struct *mdres,
1891                            struct async_work *async)
1892 {
1893         struct btrfs_super_block *super;
1894         u8 *buffer = NULL;
1895         u8 *outbuf;
1896         int ret;
1897
1898         /* We've already been initialized */
1899         if (mdres->nodesize)
1900                 return 0;
1901
1902         if (mdres->compress_method == COMPRESS_ZLIB) {
1903                 size_t size = MAX_PENDING_SIZE * 2;
1904
1905                 buffer = malloc(MAX_PENDING_SIZE * 2);
1906                 if (!buffer)
1907                         return -ENOMEM;
1908                 ret = uncompress(buffer, (unsigned long *)&size,
1909                                  async->buffer, async->bufsize);
1910                 if (ret != Z_OK) {
1911                         fprintf(stderr, "Error decompressing %d\n", ret);
1912                         free(buffer);
1913                         return -EIO;
1914                 }
1915                 outbuf = buffer;
1916         } else {
1917                 outbuf = async->buffer;
1918         }
1919
1920         super = (struct btrfs_super_block *)outbuf;
1921         mdres->nodesize = btrfs_super_nodesize(super);
1922         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1923         memcpy(mdres->uuid, super->dev_item.uuid,
1924                        BTRFS_UUID_SIZE);
1925         mdres->devid = le64_to_cpu(super->dev_item.devid);
1926         free(buffer);
1927         return 0;
1928 }
1929
1930 static int add_cluster(struct meta_cluster *cluster,
1931                        struct mdrestore_struct *mdres, u64 *next)
1932 {
1933         struct meta_cluster_item *item;
1934         struct meta_cluster_header *header = &cluster->header;
1935         struct async_work *async;
1936         u64 bytenr;
1937         u32 i, nritems;
1938         int ret;
1939
1940         mdres->compress_method = header->compress;
1941
1942         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1943         nritems = le32_to_cpu(header->nritems);
1944         for (i = 0; i < nritems; i++) {
1945                 item = &cluster->items[i];
1946                 async = calloc(1, sizeof(*async));
1947                 if (!async) {
1948                         fprintf(stderr, "Error allocating async\n");
1949                         return -ENOMEM;
1950                 }
1951                 async->start = le64_to_cpu(item->bytenr);
1952                 async->bufsize = le32_to_cpu(item->size);
1953                 async->buffer = malloc(async->bufsize);
1954                 if (!async->buffer) {
1955                         fprintf(stderr, "Error allocating async buffer\n");
1956                         free(async);
1957                         return -ENOMEM;
1958                 }
1959                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1960                 if (ret != 1) {
1961                         fprintf(stderr, "Error reading buffer %d\n", errno);
1962                         free(async->buffer);
1963                         free(async);
1964                         return -EIO;
1965                 }
1966                 bytenr += async->bufsize;
1967
1968                 pthread_mutex_lock(&mdres->mutex);
1969                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1970                         ret = fill_mdres_info(mdres, async);
1971                         if (ret) {
1972                                 fprintf(stderr, "Error setting up restore\n");
1973                                 pthread_mutex_unlock(&mdres->mutex);
1974                                 free(async->buffer);
1975                                 free(async);
1976                                 return ret;
1977                         }
1978                 }
1979                 list_add_tail(&async->list, &mdres->list);
1980                 mdres->num_items++;
1981                 pthread_cond_signal(&mdres->cond);
1982                 pthread_mutex_unlock(&mdres->mutex);
1983         }
1984         if (bytenr & BLOCK_MASK) {
1985                 char buffer[BLOCK_MASK];
1986                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1987
1988                 bytenr += size;
1989                 ret = fread(buffer, size, 1, mdres->in);
1990                 if (ret != 1) {
1991                         fprintf(stderr, "Error reading in buffer %d\n", errno);
1992                         return -EIO;
1993                 }
1994         }
1995         *next = bytenr;
1996         return 0;
1997 }
1998
1999 static int wait_for_worker(struct mdrestore_struct *mdres)
2000 {
2001         int ret = 0;
2002
2003         pthread_mutex_lock(&mdres->mutex);
2004         ret = mdres->error;
2005         while (!ret && mdres->num_items > 0) {
2006                 struct timespec ts = {
2007                         .tv_sec = 0,
2008                         .tv_nsec = 10000000,
2009                 };
2010                 pthread_mutex_unlock(&mdres->mutex);
2011                 nanosleep(&ts, NULL);
2012                 pthread_mutex_lock(&mdres->mutex);
2013                 ret = mdres->error;
2014         }
2015         pthread_mutex_unlock(&mdres->mutex);
2016         return ret;
2017 }
2018
2019 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2020                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2021                             u64 cluster_bytenr)
2022 {
2023         struct extent_buffer *eb;
2024         int ret = 0;
2025         int i;
2026
2027         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2028         if (!eb) {
2029                 ret = -ENOMEM;
2030                 goto out;
2031         }
2032
2033         while (item_bytenr != bytenr) {
2034                 buffer += mdres->nodesize;
2035                 item_bytenr += mdres->nodesize;
2036         }
2037
2038         memcpy(eb->data, buffer, mdres->nodesize);
2039         if (btrfs_header_bytenr(eb) != bytenr) {
2040                 fprintf(stderr, "Eb bytenr doesn't match found bytenr\n");
2041                 ret = -EIO;
2042                 goto out;
2043         }
2044
2045         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2046                    BTRFS_FSID_SIZE)) {
2047                 fprintf(stderr, "Fsid doesn't match\n");
2048                 ret = -EIO;
2049                 goto out;
2050         }
2051
2052         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2053                 fprintf(stderr, "Does not belong to the chunk tree\n");
2054                 ret = -EIO;
2055                 goto out;
2056         }
2057
2058         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2059                 struct btrfs_chunk *chunk;
2060                 struct fs_chunk *fs_chunk;
2061                 struct btrfs_key key;
2062                 u64 type;
2063
2064                 if (btrfs_header_level(eb)) {
2065                         u64 blockptr = btrfs_node_blockptr(eb, i);
2066
2067                         ret = search_for_chunk_blocks(mdres, blockptr,
2068                                                       cluster_bytenr);
2069                         if (ret)
2070                                 break;
2071                         continue;
2072                 }
2073
2074                 /* Yay a leaf!  We loves leafs! */
2075                 btrfs_item_key_to_cpu(eb, &key, i);
2076                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2077                         continue;
2078
2079                 fs_chunk = malloc(sizeof(struct fs_chunk));
2080                 if (!fs_chunk) {
2081                         fprintf(stderr, "Error allocating chunk\n");
2082                         ret = -ENOMEM;
2083                         break;
2084                 }
2085                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2086                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2087
2088                 fs_chunk->logical = key.offset;
2089                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2090                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2091                 INIT_LIST_HEAD(&fs_chunk->list);
2092                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2093                                 physical_cmp, 1) != NULL)
2094                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2095                 else
2096                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2097                                     physical_cmp);
2098
2099                 type = btrfs_chunk_type(eb, chunk);
2100                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2101                         fs_chunk->physical_dup =
2102                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2103                 }
2104
2105                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2106                     mdres->last_physical_offset)
2107                         mdres->last_physical_offset = fs_chunk->physical_dup +
2108                                 fs_chunk->bytes;
2109                 else if (fs_chunk->physical + fs_chunk->bytes >
2110                     mdres->last_physical_offset)
2111                         mdres->last_physical_offset = fs_chunk->physical +
2112                                 fs_chunk->bytes;
2113                 mdres->alloced_chunks += fs_chunk->bytes;
2114                 /* in dup case, fs_chunk->bytes should add twice */
2115                 if (fs_chunk->physical_dup)
2116                         mdres->alloced_chunks += fs_chunk->bytes;
2117                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2118         }
2119 out:
2120         free(eb);
2121         return ret;
2122 }
2123
2124 /* If you have to ask you aren't worthy */
2125 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2126                                    u64 search, u64 cluster_bytenr)
2127 {
2128         struct meta_cluster *cluster;
2129         struct meta_cluster_header *header;
2130         struct meta_cluster_item *item;
2131         u64 current_cluster = cluster_bytenr, bytenr;
2132         u64 item_bytenr;
2133         u32 bufsize, nritems, i;
2134         u32 max_size = MAX_PENDING_SIZE * 2;
2135         u8 *buffer, *tmp = NULL;
2136         int ret = 0;
2137
2138         cluster = malloc(BLOCK_SIZE);
2139         if (!cluster) {
2140                 fprintf(stderr, "Error allocating cluster\n");
2141                 return -ENOMEM;
2142         }
2143
2144         buffer = malloc(max_size);
2145         if (!buffer) {
2146                 fprintf(stderr, "Error allocating buffer\n");
2147                 free(cluster);
2148                 return -ENOMEM;
2149         }
2150
2151         if (mdres->compress_method == COMPRESS_ZLIB) {
2152                 tmp = malloc(max_size);
2153                 if (!tmp) {
2154                         fprintf(stderr, "Error allocating tmp buffer\n");
2155                         free(cluster);
2156                         free(buffer);
2157                         return -ENOMEM;
2158                 }
2159         }
2160
2161         bytenr = current_cluster;
2162         while (1) {
2163                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2164                         fprintf(stderr, "Error seeking: %d\n", errno);
2165                         ret = -EIO;
2166                         break;
2167                 }
2168
2169                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2170                 if (ret == 0) {
2171                         if (cluster_bytenr != 0) {
2172                                 cluster_bytenr = 0;
2173                                 current_cluster = 0;
2174                                 bytenr = 0;
2175                                 continue;
2176                         }
2177                         printf("ok this is where we screwed up?\n");
2178                         ret = -EIO;
2179                         break;
2180                 } else if (ret < 0) {
2181                         fprintf(stderr, "Error reading image\n");
2182                         break;
2183                 }
2184                 ret = 0;
2185
2186                 header = &cluster->header;
2187                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2188                     le64_to_cpu(header->bytenr) != current_cluster) {
2189                         fprintf(stderr, "bad header in metadump image\n");
2190                         ret = -EIO;
2191                         break;
2192                 }
2193
2194                 bytenr += BLOCK_SIZE;
2195                 nritems = le32_to_cpu(header->nritems);
2196                 for (i = 0; i < nritems; i++) {
2197                         size_t size;
2198
2199                         item = &cluster->items[i];
2200                         bufsize = le32_to_cpu(item->size);
2201                         item_bytenr = le64_to_cpu(item->bytenr);
2202
2203                         if (bufsize > max_size) {
2204                                 fprintf(stderr, "item %u size %u too big\n",
2205                                         i, bufsize);
2206                                 ret = -EIO;
2207                                 break;
2208                         }
2209
2210                         if (mdres->compress_method == COMPRESS_ZLIB) {
2211                                 ret = fread(tmp, bufsize, 1, mdres->in);
2212                                 if (ret != 1) {
2213                                         fprintf(stderr, "Error reading: %d\n",
2214                                                 errno);
2215                                         ret = -EIO;
2216                                         break;
2217                                 }
2218
2219                                 size = max_size;
2220                                 ret = uncompress(buffer,
2221                                                  (unsigned long *)&size, tmp,
2222                                                  bufsize);
2223                                 if (ret != Z_OK) {
2224                                         fprintf(stderr, "Error decompressing "
2225                                                 "%d\n", ret);
2226                                         ret = -EIO;
2227                                         break;
2228                                 }
2229                         } else {
2230                                 ret = fread(buffer, bufsize, 1, mdres->in);
2231                                 if (ret != 1) {
2232                                         fprintf(stderr, "Error reading: %d\n",
2233                                                 errno);
2234                                         ret = -EIO;
2235                                         break;
2236                                 }
2237                                 size = bufsize;
2238                         }
2239                         ret = 0;
2240
2241                         if (item_bytenr <= search &&
2242                             item_bytenr + size > search) {
2243                                 ret = read_chunk_block(mdres, buffer, search,
2244                                                        item_bytenr, size,
2245                                                        current_cluster);
2246                                 if (!ret)
2247                                         ret = 1;
2248                                 break;
2249                         }
2250                         bytenr += bufsize;
2251                 }
2252                 if (ret) {
2253                         if (ret > 0)
2254                                 ret = 0;
2255                         break;
2256                 }
2257                 if (bytenr & BLOCK_MASK)
2258                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2259                 current_cluster = bytenr;
2260         }
2261
2262         free(tmp);
2263         free(buffer);
2264         free(cluster);
2265         return ret;
2266 }
2267
2268 static int build_chunk_tree(struct mdrestore_struct *mdres,
2269                             struct meta_cluster *cluster)
2270 {
2271         struct btrfs_super_block *super;
2272         struct meta_cluster_header *header;
2273         struct meta_cluster_item *item = NULL;
2274         u64 chunk_root_bytenr = 0;
2275         u32 i, nritems;
2276         u64 bytenr = 0;
2277         u8 *buffer;
2278         int ret;
2279
2280         /* We can't seek with stdin so don't bother doing this */
2281         if (mdres->in == stdin)
2282                 return 0;
2283
2284         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2285         if (ret <= 0) {
2286                 fprintf(stderr, "Error reading in cluster: %d\n", errno);
2287                 return -EIO;
2288         }
2289         ret = 0;
2290
2291         header = &cluster->header;
2292         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2293             le64_to_cpu(header->bytenr) != 0) {
2294                 fprintf(stderr, "bad header in metadump image\n");
2295                 return -EIO;
2296         }
2297
2298         bytenr += BLOCK_SIZE;
2299         mdres->compress_method = header->compress;
2300         nritems = le32_to_cpu(header->nritems);
2301         for (i = 0; i < nritems; i++) {
2302                 item = &cluster->items[i];
2303
2304                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2305                         break;
2306                 bytenr += le32_to_cpu(item->size);
2307                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2308                         fprintf(stderr, "Error seeking: %d\n", errno);
2309                         return -EIO;
2310                 }
2311         }
2312
2313         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2314                 fprintf(stderr, "Huh, didn't find the super?\n");
2315                 return -EINVAL;
2316         }
2317
2318         buffer = malloc(le32_to_cpu(item->size));
2319         if (!buffer) {
2320                 fprintf(stderr, "Error allocating buffer\n");
2321                 return -ENOMEM;
2322         }
2323
2324         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2325         if (ret != 1) {
2326                 fprintf(stderr, "Error reading buffer: %d\n", errno);
2327                 free(buffer);
2328                 return -EIO;
2329         }
2330
2331         if (mdres->compress_method == COMPRESS_ZLIB) {
2332                 size_t size = MAX_PENDING_SIZE * 2;
2333                 u8 *tmp;
2334
2335                 tmp = malloc(MAX_PENDING_SIZE * 2);
2336                 if (!tmp) {
2337                         free(buffer);
2338                         return -ENOMEM;
2339                 }
2340                 ret = uncompress(tmp, (unsigned long *)&size,
2341                                  buffer, le32_to_cpu(item->size));
2342                 if (ret != Z_OK) {
2343                         fprintf(stderr, "Error decompressing %d\n", ret);
2344                         free(buffer);
2345                         free(tmp);
2346                         return -EIO;
2347                 }
2348                 free(buffer);
2349                 buffer = tmp;
2350         }
2351
2352         pthread_mutex_lock(&mdres->mutex);
2353         super = (struct btrfs_super_block *)buffer;
2354         chunk_root_bytenr = btrfs_super_chunk_root(super);
2355         mdres->nodesize = btrfs_super_nodesize(super);
2356         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2357         memcpy(mdres->uuid, super->dev_item.uuid,
2358                        BTRFS_UUID_SIZE);
2359         mdres->devid = le64_to_cpu(super->dev_item.devid);
2360         free(buffer);
2361         pthread_mutex_unlock(&mdres->mutex);
2362
2363         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2364 }
2365
2366 static int range_contains_super(u64 physical, u64 bytes)
2367 {
2368         u64 super_bytenr;
2369         int i;
2370
2371         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2372                 super_bytenr = btrfs_sb_offset(i);
2373                 if (super_bytenr >= physical &&
2374                     super_bytenr < physical + bytes)
2375                         return 1;
2376         }
2377
2378         return 0;
2379 }
2380
2381 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2382 {
2383         struct fs_chunk *fs_chunk;
2384
2385         while (!list_empty(&mdres->overlapping_chunks)) {
2386                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2387                                             struct fs_chunk, list);
2388                 list_del_init(&fs_chunk->list);
2389                 if (range_contains_super(fs_chunk->physical,
2390                                          fs_chunk->bytes)) {
2391                         fprintf(stderr, "Remapping a chunk that had a super "
2392                                 "mirror inside of it, clearing space cache "
2393                                 "so we don't end up with corruption\n");
2394                         mdres->clear_space_cache = 1;
2395                 }
2396                 fs_chunk->physical = mdres->last_physical_offset;
2397                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2398                 mdres->last_physical_offset += fs_chunk->bytes;
2399         }
2400 }
2401
2402 static int fixup_devices(struct btrfs_fs_info *fs_info,
2403                          struct mdrestore_struct *mdres, off_t dev_size)
2404 {
2405         struct btrfs_trans_handle *trans;
2406         struct btrfs_dev_item *dev_item;
2407         struct btrfs_path *path;
2408         struct extent_buffer *leaf;
2409         struct btrfs_root *root = fs_info->chunk_root;
2410         struct btrfs_key key;
2411         u64 devid, cur_devid;
2412         int ret;
2413
2414         path = btrfs_alloc_path();
2415         if (!path) {
2416                 fprintf(stderr, "Error allocating path\n");
2417                 return -ENOMEM;
2418         }
2419
2420         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2421         if (IS_ERR(trans)) {
2422                 fprintf(stderr, "Error starting transaction %ld\n",
2423                         PTR_ERR(trans));
2424                 btrfs_free_path(path);
2425                 return PTR_ERR(trans);
2426         }
2427
2428         dev_item = &fs_info->super_copy->dev_item;
2429
2430         devid = btrfs_stack_device_id(dev_item);
2431
2432         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2433         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2434
2435         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2436         key.type = BTRFS_DEV_ITEM_KEY;
2437         key.offset = 0;
2438
2439 again:
2440         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2441         if (ret < 0) {
2442                 fprintf(stderr, "search failed %d\n", ret);
2443                 exit(1);
2444         }
2445
2446         while (1) {
2447                 leaf = path->nodes[0];
2448                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
2449                         ret = btrfs_next_leaf(root, path);
2450                         if (ret < 0) {
2451                                 fprintf(stderr, "Error going to next leaf "
2452                                         "%d\n", ret);
2453                                 exit(1);
2454                         }
2455                         if (ret > 0) {
2456                                 ret = 0;
2457                                 break;
2458                         }
2459                         leaf = path->nodes[0];
2460                 }
2461
2462                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2463                 if (key.type > BTRFS_DEV_ITEM_KEY)
2464                         break;
2465                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2466                         path->slots[0]++;
2467                         continue;
2468                 }
2469
2470                 dev_item = btrfs_item_ptr(leaf, path->slots[0],
2471                                           struct btrfs_dev_item);
2472                 cur_devid = btrfs_device_id(leaf, dev_item);
2473                 if (devid != cur_devid) {
2474                         ret = btrfs_del_item(trans, root, path);
2475                         if (ret) {
2476                                 fprintf(stderr, "Error deleting item %d\n",
2477                                         ret);
2478                                 exit(1);
2479                         }
2480                         btrfs_release_path(path);
2481                         goto again;
2482                 }
2483
2484                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2485                 btrfs_set_device_bytes_used(leaf, dev_item,
2486                                             mdres->alloced_chunks);
2487                 btrfs_mark_buffer_dirty(leaf);
2488                 path->slots[0]++;
2489         }
2490
2491         btrfs_free_path(path);
2492         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2493         if (ret) {
2494                 fprintf(stderr, "Commit failed %d\n", ret);
2495                 return ret;
2496         }
2497         return 0;
2498 }
2499
2500 static int restore_metadump(const char *input, FILE *out, int old_restore,
2501                             int num_threads, int fixup_offset,
2502                             const char *target, int multi_devices)
2503 {
2504         struct meta_cluster *cluster = NULL;
2505         struct meta_cluster_header *header;
2506         struct mdrestore_struct mdrestore;
2507         struct btrfs_fs_info *info = NULL;
2508         u64 bytenr = 0;
2509         FILE *in = NULL;
2510         int ret = 0;
2511
2512         if (!strcmp(input, "-")) {
2513                 in = stdin;
2514         } else {
2515                 in = fopen(input, "r");
2516                 if (!in) {
2517                         perror("unable to open metadump image");
2518                         return 1;
2519                 }
2520         }
2521
2522         /* NOTE: open with write mode */
2523         if (fixup_offset) {
2524                 BUG_ON(!target);
2525                 info = open_ctree_fs_info(target, 0, 0, 0,
2526                                           OPEN_CTREE_WRITES |
2527                                           OPEN_CTREE_RESTORE |
2528                                           OPEN_CTREE_PARTIAL);
2529                 if (!info) {
2530                         fprintf(stderr, "%s: open ctree failed\n", __func__);
2531                         ret = -EIO;
2532                         goto failed_open;
2533                 }
2534         }
2535
2536         cluster = malloc(BLOCK_SIZE);
2537         if (!cluster) {
2538                 fprintf(stderr, "Error allocating cluster\n");
2539                 ret = -ENOMEM;
2540                 goto failed_info;
2541         }
2542
2543         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2544                              fixup_offset, info, multi_devices);
2545         if (ret) {
2546                 fprintf(stderr, "Error initializing mdrestore %d\n", ret);
2547                 goto failed_cluster;
2548         }
2549
2550         if (!multi_devices && !old_restore) {
2551                 ret = build_chunk_tree(&mdrestore, cluster);
2552                 if (ret)
2553                         goto out;
2554                 if (!list_empty(&mdrestore.overlapping_chunks))
2555                         remap_overlapping_chunks(&mdrestore);
2556         }
2557
2558         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2559                 fprintf(stderr, "Error seeking %d\n", errno);
2560                 goto out;
2561         }
2562
2563         while (!mdrestore.error) {
2564                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2565                 if (!ret)
2566                         break;
2567
2568                 header = &cluster->header;
2569                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2570                     le64_to_cpu(header->bytenr) != bytenr) {
2571                         fprintf(stderr, "bad header in metadump image\n");
2572                         ret = -EIO;
2573                         break;
2574                 }
2575                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2576                 if (ret) {
2577                         fprintf(stderr, "Error adding cluster\n");
2578                         break;
2579                 }
2580         }
2581         ret = wait_for_worker(&mdrestore);
2582
2583         if (!ret && !multi_devices && !old_restore) {
2584                 struct btrfs_root *root;
2585                 struct stat st;
2586
2587                 root = open_ctree_fd(fileno(out), target, 0,
2588                                           OPEN_CTREE_PARTIAL |
2589                                           OPEN_CTREE_WRITES |
2590                                           OPEN_CTREE_NO_DEVICES);
2591                 if (!root) {
2592                         fprintf(stderr, "unable to open %s\n", target);
2593                         ret = -EIO;
2594                         goto out;
2595                 }
2596                 info = root->fs_info;
2597
2598                 if (stat(target, &st)) {
2599                         fprintf(stderr, "statting %s failed\n", target);
2600                         close_ctree(info->chunk_root);
2601                         free(cluster);
2602                         return 1;
2603                 }
2604
2605                 ret = fixup_devices(info, &mdrestore, st.st_size);
2606                 close_ctree(info->chunk_root);
2607                 if (ret)
2608                         goto out;
2609         }
2610 out:
2611         mdrestore_destroy(&mdrestore, num_threads);
2612 failed_cluster:
2613         free(cluster);
2614 failed_info:
2615         if (fixup_offset && info)
2616                 close_ctree(info->chunk_root);
2617 failed_open:
2618         if (in != stdin)
2619                 fclose(in);
2620         return ret;
2621 }
2622
2623 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2624                                        const char *other_dev, u64 cur_devid)
2625 {
2626         struct btrfs_key key;
2627         struct extent_buffer *leaf;
2628         struct btrfs_path path;
2629         struct btrfs_dev_item *dev_item;
2630         struct btrfs_super_block *disk_super;
2631         char dev_uuid[BTRFS_UUID_SIZE];
2632         char fs_uuid[BTRFS_UUID_SIZE];
2633         u64 devid, type, io_align, io_width;
2634         u64 sector_size, total_bytes, bytes_used;
2635         char buf[BTRFS_SUPER_INFO_SIZE];
2636         int fp = -1;
2637         int ret;
2638
2639         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2640         key.type = BTRFS_DEV_ITEM_KEY;
2641         key.offset = cur_devid;
2642
2643         btrfs_init_path(&path);
2644         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2645         if (ret) {
2646                 fprintf(stderr, "ERROR: search key failed\n");
2647                 ret = -EIO;
2648                 goto out;
2649         }
2650
2651         leaf = path.nodes[0];
2652         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2653                                   struct btrfs_dev_item);
2654
2655         devid = btrfs_device_id(leaf, dev_item);
2656         if (devid != cur_devid) {
2657                 printk("ERROR: devid %llu mismatch with %llu\n", devid, cur_devid);
2658                 ret = -EIO;
2659                 goto out;
2660         }
2661
2662         type = btrfs_device_type(leaf, dev_item);
2663         io_align = btrfs_device_io_align(leaf, dev_item);
2664         io_width = btrfs_device_io_width(leaf, dev_item);
2665         sector_size = btrfs_device_sector_size(leaf, dev_item);
2666         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2667         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2668         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2669         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2670
2671         btrfs_release_path(&path);
2672
2673         printk("update disk super on %s devid=%llu\n", other_dev, devid);
2674
2675         /* update other devices' super block */
2676         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2677         if (fp < 0) {
2678                 fprintf(stderr, "ERROR: could not open %s\n", other_dev);
2679                 ret = -EIO;
2680                 goto out;
2681         }
2682
2683         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2684
2685         disk_super = (struct btrfs_super_block *)buf;
2686         dev_item = &disk_super->dev_item;
2687
2688         btrfs_set_stack_device_type(dev_item, type);
2689         btrfs_set_stack_device_id(dev_item, devid);
2690         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2691         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2692         btrfs_set_stack_device_io_align(dev_item, io_align);
2693         btrfs_set_stack_device_io_width(dev_item, io_width);
2694         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2695         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2696         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2697         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2698
2699         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2700         if (ret != BTRFS_SUPER_INFO_SIZE) {
2701                 if (ret < 0)
2702                         fprintf(stderr, "ERROR: cannot write superblock: %s\n", strerror(ret));
2703                 else
2704                         fprintf(stderr, "ERROR: cannot write superblock\n");
2705                 ret = -EIO;
2706                 goto out;
2707         }
2708
2709         write_backup_supers(fp, (u8 *)buf);
2710
2711 out:
2712         if (fp != -1)
2713                 close(fp);
2714         return ret;
2715 }
2716
2717 static void print_usage(int ret)
2718 {
2719         fprintf(stderr, "usage: btrfs-image [options] source target\n");
2720         fprintf(stderr, "\t-r      \trestore metadump image\n");
2721         fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
2722         fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
2723         fprintf(stderr, "\t-o      \tdon't mess with the chunk tree when restoring\n");
2724         fprintf(stderr, "\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2725         fprintf(stderr, "\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2726         fprintf(stderr, "\t-m      \trestore for multiple devices\n");
2727         fprintf(stderr, "\n");
2728         fprintf(stderr, "\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2729         fprintf(stderr, "\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2730         exit(ret);
2731 }
2732
2733 int main(int argc, char *argv[])
2734 {
2735         char *source;
2736         char *target;
2737         u64 num_threads = 0;
2738         u64 compress_level = 0;
2739         int create = 1;
2740         int old_restore = 0;
2741         int walk_trees = 0;
2742         int multi_devices = 0;
2743         int ret;
2744         int sanitize = 0;
2745         int dev_cnt = 0;
2746         int usage_error = 0;
2747         FILE *out;
2748
2749         while (1) {
2750                 static const struct option long_options[] = {
2751                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2752                         { NULL, 0, NULL, 0 }
2753                 };
2754                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2755                 if (c < 0)
2756                         break;
2757                 switch (c) {
2758                 case 'r':
2759                         create = 0;
2760                         break;
2761                 case 't':
2762                         num_threads = arg_strtou64(optarg);
2763                         if (num_threads > 32)
2764                                 print_usage(1);
2765                         break;
2766                 case 'c':
2767                         compress_level = arg_strtou64(optarg);
2768                         if (compress_level > 9)
2769                                 print_usage(1);
2770                         break;
2771                 case 'o':
2772                         old_restore = 1;
2773                         break;
2774                 case 's':
2775                         sanitize++;
2776                         break;
2777                 case 'w':
2778                         walk_trees = 1;
2779                         break;
2780                 case 'm':
2781                         create = 0;
2782                         multi_devices = 1;
2783                         break;
2784                         case GETOPT_VAL_HELP:
2785                 default:
2786                         print_usage(c != GETOPT_VAL_HELP);
2787                 }
2788         }
2789
2790         set_argv0(argv);
2791         if (check_argc_min(argc - optind, 2))
2792                 print_usage(1);
2793
2794         dev_cnt = argc - optind - 1;
2795
2796         if (create) {
2797                 if (old_restore) {
2798                         fprintf(stderr, "Usage error: create and restore cannot be used at the same time\n");
2799                         usage_error++;
2800                 }
2801         } else {
2802                 if (walk_trees || sanitize || compress_level) {
2803                         fprintf(stderr, "Usage error: use -w, -s, -c options for restore makes no sense\n");
2804                         usage_error++;
2805                 }
2806                 if (multi_devices && dev_cnt < 2) {
2807                         fprintf(stderr, "Usage error: not enough devices specified for -m option\n");
2808                         usage_error++;
2809                 }
2810                 if (!multi_devices && dev_cnt != 1) {
2811                         fprintf(stderr, "Usage error: accepts only 1 device without -m option\n");
2812                         usage_error++;
2813                 }
2814         }
2815
2816         if (usage_error)
2817                 print_usage(1);
2818
2819         source = argv[optind];
2820         target = argv[optind + 1];
2821
2822         if (create && !strcmp(target, "-")) {
2823                 out = stdout;
2824         } else {
2825                 out = fopen(target, "w+");
2826                 if (!out) {
2827                         perror("unable to create target file");
2828                         exit(1);
2829                 }
2830         }
2831
2832         if (compress_level > 0 || create == 0) {
2833                 if (num_threads == 0) {
2834                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2835
2836                         if (tmp <= 0)
2837                                 tmp = 1;
2838                         num_threads = tmp;
2839                 }
2840         } else {
2841                 num_threads = 0;
2842         }
2843
2844         if (create) {
2845                 ret = check_mounted(source);
2846                 if (ret < 0) {
2847                         fprintf(stderr, "Could not check mount status: %s\n",
2848                                 strerror(-ret));
2849                         exit(1);
2850                 } else if (ret)
2851                         fprintf(stderr,
2852                 "WARNING: The device is mounted. Make sure the filesystem is quiescent.\n");
2853
2854                 ret = create_metadump(source, out, num_threads,
2855                                       compress_level, sanitize, walk_trees);
2856         } else {
2857                 ret = restore_metadump(source, out, old_restore, num_threads,
2858                                        0, target, multi_devices);
2859         }
2860         if (ret) {
2861                 printk("%s failed (%s)\n", (create) ? "create" : "restore",
2862                        strerror(errno));
2863                 goto out;
2864         }
2865
2866          /* extended support for multiple devices */
2867         if (!create && multi_devices) {
2868                 struct btrfs_fs_info *info;
2869                 u64 total_devs;
2870                 int i;
2871
2872                 info = open_ctree_fs_info(target, 0, 0, 0,
2873                                           OPEN_CTREE_PARTIAL |
2874                                           OPEN_CTREE_RESTORE);
2875                 if (!info) {
2876                         fprintf(stderr, "unable to open %s error = %s\n",
2877                                 target, strerror(errno));
2878                         return 1;
2879                 }
2880
2881                 total_devs = btrfs_super_num_devices(info->super_copy);
2882                 if (total_devs != dev_cnt) {
2883                         printk("it needs %llu devices but has only %d\n",
2884                                 total_devs, dev_cnt);
2885                         close_ctree(info->chunk_root);
2886                         goto out;
2887                 }
2888
2889                 /* update super block on other disks */
2890                 for (i = 2; i <= dev_cnt; i++) {
2891                         ret = update_disk_super_on_device(info,
2892                                         argv[optind + i], (u64)i);
2893                         if (ret) {
2894                                 printk("update disk super failed devid=%d (error=%d)\n",
2895                                         i, ret);
2896                                 close_ctree(info->chunk_root);
2897                                 exit(1);
2898                         }
2899                 }
2900
2901                 close_ctree(info->chunk_root);
2902
2903                 /* fix metadata block to map correct chunk */
2904                 ret = restore_metadump(source, out, 0, num_threads, 1,
2905                                        target, 1);
2906                 if (ret) {
2907                         fprintf(stderr, "fix metadump failed (error=%d)\n",
2908                                 ret);
2909                         exit(1);
2910                 }
2911         }
2912 out:
2913         if (out == stdout) {
2914                 fflush(out);
2915         } else {
2916                 fclose(out);
2917                 if (ret && create) {
2918                         int unlink_ret;
2919
2920                         unlink_ret = unlink(target);
2921                         if (unlink_ret)
2922                                 fprintf(stderr,
2923                                         "unlink output file failed : %s\n",
2924                                         strerror(errno));
2925                 }
2926         }
2927
2928         btrfs_close_all_devices();
2929
2930         return !!ret;
2931 }