a84b10edc4cba3a5e38c1d798a33379ab897e248
[platform/upstream/btrfs-progs.git] / image / main.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38 #include "help.h"
39
40 #define HEADER_MAGIC            0xbd5c25e27295668bULL
41 #define MAX_PENDING_SIZE        (256 * 1024)
42 #define BLOCK_SIZE              1024
43 #define BLOCK_MASK              (BLOCK_SIZE - 1)
44
45 #define COMPRESS_NONE           0
46 #define COMPRESS_ZLIB           1
47
48 #define MAX_WORKER_THREADS      (32)
49
50 struct meta_cluster_item {
51         __le64 bytenr;
52         __le32 size;
53 } __attribute__ ((__packed__));
54
55 struct meta_cluster_header {
56         __le64 magic;
57         __le64 bytenr;
58         __le32 nritems;
59         u8 compress;
60 } __attribute__ ((__packed__));
61
62 /* cluster header + index items + buffers */
63 struct meta_cluster {
64         struct meta_cluster_header header;
65         struct meta_cluster_item items[];
66 } __attribute__ ((__packed__));
67
68 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
69                            sizeof(struct meta_cluster_item))
70
71 struct fs_chunk {
72         u64 logical;
73         u64 physical;
74         /*
75          * physical_dup only store additonal physical for BTRFS_BLOCK_GROUP_DUP
76          * currently restore only support single and DUP
77          * TODO: modify this structure and the function related to this
78          * structure for support RAID*
79          */
80         u64 physical_dup;
81         u64 bytes;
82         struct rb_node l;
83         struct rb_node p;
84         struct list_head list;
85 };
86
87 struct async_work {
88         struct list_head list;
89         struct list_head ordered;
90         u64 start;
91         u64 size;
92         u8 *buffer;
93         size_t bufsize;
94         int error;
95 };
96
97 struct metadump_struct {
98         struct btrfs_root *root;
99         FILE *out;
100
101         union {
102                 struct meta_cluster cluster;
103                 char meta_cluster_bytes[BLOCK_SIZE];
104         };
105
106         pthread_t threads[MAX_WORKER_THREADS];
107         size_t num_threads;
108         pthread_mutex_t mutex;
109         pthread_cond_t cond;
110         struct rb_root name_tree;
111
112         struct list_head list;
113         struct list_head ordered;
114         size_t num_items;
115         size_t num_ready;
116
117         u64 pending_start;
118         u64 pending_size;
119
120         int compress_level;
121         int done;
122         int data;
123         int sanitize_names;
124
125         int error;
126 };
127
128 struct name {
129         struct rb_node n;
130         char *val;
131         char *sub;
132         u32 len;
133 };
134
135 struct mdrestore_struct {
136         FILE *in;
137         FILE *out;
138
139         pthread_t threads[MAX_WORKER_THREADS];
140         size_t num_threads;
141         pthread_mutex_t mutex;
142         pthread_cond_t cond;
143
144         struct rb_root chunk_tree;
145         struct rb_root physical_tree;
146         struct list_head list;
147         struct list_head overlapping_chunks;
148         size_t num_items;
149         u32 nodesize;
150         u64 devid;
151         u64 alloced_chunks;
152         u64 last_physical_offset;
153         u8 uuid[BTRFS_UUID_SIZE];
154         u8 fsid[BTRFS_FSID_SIZE];
155
156         int compress_method;
157         int done;
158         int error;
159         int old_restore;
160         int fixup_offset;
161         int multi_devices;
162         int clear_space_cache;
163         struct btrfs_fs_info *info;
164 };
165
166 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
167                                    u64 search, u64 cluster_bytenr);
168 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
169
170 static void csum_block(u8 *buf, size_t len)
171 {
172         u8 result[BTRFS_CRC32_SIZE];
173         u32 crc = ~(u32)0;
174         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
175         btrfs_csum_final(crc, result);
176         memcpy(buf, result, BTRFS_CRC32_SIZE);
177 }
178
179 static int has_name(struct btrfs_key *key)
180 {
181         switch (key->type) {
182         case BTRFS_DIR_ITEM_KEY:
183         case BTRFS_DIR_INDEX_KEY:
184         case BTRFS_INODE_REF_KEY:
185         case BTRFS_INODE_EXTREF_KEY:
186         case BTRFS_XATTR_ITEM_KEY:
187                 return 1;
188         default:
189                 break;
190         }
191
192         return 0;
193 }
194
195 static char *generate_garbage(u32 name_len)
196 {
197         char *buf = malloc(name_len);
198         int i;
199
200         if (!buf)
201                 return NULL;
202
203         for (i = 0; i < name_len; i++) {
204                 char c = rand_range(94) + 33;
205
206                 if (c == '/')
207                         c++;
208                 buf[i] = c;
209         }
210
211         return buf;
212 }
213
214 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
215 {
216         struct name *entry = rb_entry(a, struct name, n);
217         struct name *ins = rb_entry(b, struct name, n);
218         u32 len;
219
220         len = min(ins->len, entry->len);
221         return memcmp(ins->val, entry->val, len);
222 }
223
224 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
225 {
226         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
227         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
228
229         if (fuzz && ins->logical >= entry->logical &&
230             ins->logical < entry->logical + entry->bytes)
231                 return 0;
232
233         if (ins->logical < entry->logical)
234                 return -1;
235         else if (ins->logical > entry->logical)
236                 return 1;
237         return 0;
238 }
239
240 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
241 {
242         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
243         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
244
245         if (fuzz && ins->physical >= entry->physical &&
246             ins->physical < entry->physical + entry->bytes)
247                 return 0;
248
249         if (fuzz && entry->physical >= ins->physical &&
250             entry->physical < ins->physical + ins->bytes)
251                 return 0;
252
253         if (ins->physical < entry->physical)
254                 return -1;
255         else if (ins->physical > entry->physical)
256                 return 1;
257         return 0;
258 }
259
260 static void tree_insert(struct rb_root *root, struct rb_node *ins,
261                         int (*cmp)(struct rb_node *a, struct rb_node *b,
262                                    int fuzz))
263 {
264         struct rb_node ** p = &root->rb_node;
265         struct rb_node * parent = NULL;
266         int dir;
267
268         while(*p) {
269                 parent = *p;
270
271                 dir = cmp(*p, ins, 1);
272                 if (dir < 0)
273                         p = &(*p)->rb_left;
274                 else if (dir > 0)
275                         p = &(*p)->rb_right;
276                 else
277                         BUG();
278         }
279
280         rb_link_node(ins, parent, p);
281         rb_insert_color(ins, root);
282 }
283
284 static struct rb_node *tree_search(struct rb_root *root,
285                                    struct rb_node *search,
286                                    int (*cmp)(struct rb_node *a,
287                                               struct rb_node *b, int fuzz),
288                                    int fuzz)
289 {
290         struct rb_node *n = root->rb_node;
291         int dir;
292
293         while (n) {
294                 dir = cmp(n, search, fuzz);
295                 if (dir < 0)
296                         n = n->rb_left;
297                 else if (dir > 0)
298                         n = n->rb_right;
299                 else
300                         return n;
301         }
302
303         return NULL;
304 }
305
306 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
307                                u64 *size, u64 *physical_dup)
308 {
309         struct fs_chunk *fs_chunk;
310         struct rb_node *entry;
311         struct fs_chunk search;
312         u64 offset;
313
314         if (logical == BTRFS_SUPER_INFO_OFFSET)
315                 return logical;
316
317         search.logical = logical;
318         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
319         if (!entry) {
320                 if (mdres->in != stdin)
321                         warning("cannot find a chunk, using logical");
322                 return logical;
323         }
324         fs_chunk = rb_entry(entry, struct fs_chunk, l);
325         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
326                 BUG();
327         offset = search.logical - fs_chunk->logical;
328
329         if (physical_dup) {
330                 /* Only in dup case, physical_dup is not equal to 0 */
331                 if (fs_chunk->physical_dup)
332                         *physical_dup = fs_chunk->physical_dup + offset;
333                 else
334                         *physical_dup = 0;
335         }
336
337         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
338         return fs_chunk->physical + offset;
339 }
340
341
342 static char *find_collision(struct metadump_struct *md, char *name,
343                             u32 name_len)
344 {
345         struct name *val;
346         struct rb_node *entry;
347         struct name tmp;
348         unsigned long checksum;
349         int found = 0;
350         int i;
351
352         tmp.val = name;
353         tmp.len = name_len;
354         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
355         if (entry) {
356                 val = rb_entry(entry, struct name, n);
357                 free(name);
358                 return val->sub;
359         }
360
361         val = malloc(sizeof(struct name));
362         if (!val) {
363                 error("cannot sanitize name, not enough memory");
364                 free(name);
365                 return NULL;
366         }
367
368         memset(val, 0, sizeof(*val));
369
370         val->val = name;
371         val->len = name_len;
372         val->sub = malloc(name_len);
373         if (!val->sub) {
374                 error("cannot sanitize name, not enough memory");
375                 free(val);
376                 free(name);
377                 return NULL;
378         }
379
380         checksum = crc32c(~1, val->val, name_len);
381         memset(val->sub, ' ', name_len);
382         i = 0;
383         while (1) {
384                 if (crc32c(~1, val->sub, name_len) == checksum &&
385                     memcmp(val->sub, val->val, val->len)) {
386                         found = 1;
387                         break;
388                 }
389
390                 if (val->sub[i] == 127) {
391                         do {
392                                 i++;
393                                 if (i >= name_len)
394                                         break;
395                         } while (val->sub[i] == 127);
396
397                         if (i >= name_len)
398                                 break;
399                         val->sub[i]++;
400                         if (val->sub[i] == '/')
401                                 val->sub[i]++;
402                         memset(val->sub, ' ', i);
403                         i = 0;
404                         continue;
405                 } else {
406                         val->sub[i]++;
407                         if (val->sub[i] == '/')
408                                 val->sub[i]++;
409                 }
410         }
411
412         if (!found) {
413                 warning(
414 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
415                         val->len, val->val);
416                 for (i = 0; i < name_len; i++) {
417                         char c = rand_range(94) + 33;
418
419                         if (c == '/')
420                                 c++;
421                         val->sub[i] = c;
422                 }
423         }
424
425         tree_insert(&md->name_tree, &val->n, name_cmp);
426         return val->sub;
427 }
428
429 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
430                               int slot)
431 {
432         struct btrfs_dir_item *dir_item;
433         char *buf;
434         char *garbage;
435         unsigned long name_ptr;
436         u32 total_len;
437         u32 cur = 0;
438         u32 this_len;
439         u32 name_len;
440         int free_garbage = (md->sanitize_names == 1);
441
442         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
443         total_len = btrfs_item_size_nr(eb, slot);
444         while (cur < total_len) {
445                 this_len = sizeof(*dir_item) +
446                         btrfs_dir_name_len(eb, dir_item) +
447                         btrfs_dir_data_len(eb, dir_item);
448                 name_ptr = (unsigned long)(dir_item + 1);
449                 name_len = btrfs_dir_name_len(eb, dir_item);
450
451                 if (md->sanitize_names > 1) {
452                         buf = malloc(name_len);
453                         if (!buf) {
454                                 error("cannot sanitize name, not enough memory");
455                                 return;
456                         }
457                         read_extent_buffer(eb, buf, name_ptr, name_len);
458                         garbage = find_collision(md, buf, name_len);
459                 } else {
460                         garbage = generate_garbage(name_len);
461                 }
462                 if (!garbage) {
463                         error("cannot sanitize name, not enough memory");
464                         return;
465                 }
466                 write_extent_buffer(eb, garbage, name_ptr, name_len);
467                 cur += this_len;
468                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
469                                                      this_len);
470                 if (free_garbage)
471                         free(garbage);
472         }
473 }
474
475 static void sanitize_inode_ref(struct metadump_struct *md,
476                                struct extent_buffer *eb, int slot, int ext)
477 {
478         struct btrfs_inode_extref *extref;
479         struct btrfs_inode_ref *ref;
480         char *garbage, *buf;
481         unsigned long ptr;
482         unsigned long name_ptr;
483         u32 item_size;
484         u32 cur_offset = 0;
485         int len;
486         int free_garbage = (md->sanitize_names == 1);
487
488         item_size = btrfs_item_size_nr(eb, slot);
489         ptr = btrfs_item_ptr_offset(eb, slot);
490         while (cur_offset < item_size) {
491                 if (ext) {
492                         extref = (struct btrfs_inode_extref *)(ptr +
493                                                                cur_offset);
494                         name_ptr = (unsigned long)(&extref->name);
495                         len = btrfs_inode_extref_name_len(eb, extref);
496                         cur_offset += sizeof(*extref);
497                 } else {
498                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
499                         len = btrfs_inode_ref_name_len(eb, ref);
500                         name_ptr = (unsigned long)(ref + 1);
501                         cur_offset += sizeof(*ref);
502                 }
503                 cur_offset += len;
504
505                 if (md->sanitize_names > 1) {
506                         buf = malloc(len);
507                         if (!buf) {
508                                 error("cannot sanitize name, not enough memory");
509                                 return;
510                         }
511                         read_extent_buffer(eb, buf, name_ptr, len);
512                         garbage = find_collision(md, buf, len);
513                 } else {
514                         garbage = generate_garbage(len);
515                 }
516
517                 if (!garbage) {
518                         error("cannot sanitize name, not enough memory");
519                         return;
520                 }
521                 write_extent_buffer(eb, garbage, name_ptr, len);
522                 if (free_garbage)
523                         free(garbage);
524         }
525 }
526
527 static void sanitize_xattr(struct metadump_struct *md,
528                            struct extent_buffer *eb, int slot)
529 {
530         struct btrfs_dir_item *dir_item;
531         unsigned long data_ptr;
532         u32 data_len;
533
534         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
535         data_len = btrfs_dir_data_len(eb, dir_item);
536
537         data_ptr = (unsigned long)((char *)(dir_item + 1) +
538                                    btrfs_dir_name_len(eb, dir_item));
539         memset_extent_buffer(eb, 0, data_ptr, data_len);
540 }
541
542 static void sanitize_name(struct metadump_struct *md, u8 *dst,
543                           struct extent_buffer *src, struct btrfs_key *key,
544                           int slot)
545 {
546         struct extent_buffer *eb;
547
548         eb = alloc_dummy_eb(src->start, src->len);
549         if (!eb) {
550                 error("cannot sanitize name, not enough memory");
551                 return;
552         }
553
554         memcpy(eb->data, src->data, src->len);
555
556         switch (key->type) {
557         case BTRFS_DIR_ITEM_KEY:
558         case BTRFS_DIR_INDEX_KEY:
559                 sanitize_dir_item(md, eb, slot);
560                 break;
561         case BTRFS_INODE_REF_KEY:
562                 sanitize_inode_ref(md, eb, slot, 0);
563                 break;
564         case BTRFS_INODE_EXTREF_KEY:
565                 sanitize_inode_ref(md, eb, slot, 1);
566                 break;
567         case BTRFS_XATTR_ITEM_KEY:
568                 sanitize_xattr(md, eb, slot);
569                 break;
570         default:
571                 break;
572         }
573
574         memcpy(dst, eb->data, eb->len);
575         free(eb);
576 }
577
578 /*
579  * zero inline extents and csum items
580  */
581 static void zero_items(struct metadump_struct *md, u8 *dst,
582                        struct extent_buffer *src)
583 {
584         struct btrfs_file_extent_item *fi;
585         struct btrfs_item *item;
586         struct btrfs_key key;
587         u32 nritems = btrfs_header_nritems(src);
588         size_t size;
589         unsigned long ptr;
590         int i, extent_type;
591
592         for (i = 0; i < nritems; i++) {
593                 item = btrfs_item_nr(i);
594                 btrfs_item_key_to_cpu(src, &key, i);
595                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
596                         size = btrfs_item_size_nr(src, i);
597                         memset(dst + btrfs_leaf_data(src) +
598                                btrfs_item_offset_nr(src, i), 0, size);
599                         continue;
600                 }
601
602                 if (md->sanitize_names && has_name(&key)) {
603                         sanitize_name(md, dst, src, &key, i);
604                         continue;
605                 }
606
607                 if (key.type != BTRFS_EXTENT_DATA_KEY)
608                         continue;
609
610                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
611                 extent_type = btrfs_file_extent_type(src, fi);
612                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
613                         continue;
614
615                 ptr = btrfs_file_extent_inline_start(fi);
616                 size = btrfs_file_extent_inline_item_len(src, item);
617                 memset(dst + ptr, 0, size);
618         }
619 }
620
621 /*
622  * copy buffer and zero useless data in the buffer
623  */
624 static void copy_buffer(struct metadump_struct *md, u8 *dst,
625                         struct extent_buffer *src)
626 {
627         int level;
628         size_t size;
629         u32 nritems;
630
631         memcpy(dst, src->data, src->len);
632         if (src->start == BTRFS_SUPER_INFO_OFFSET)
633                 return;
634
635         level = btrfs_header_level(src);
636         nritems = btrfs_header_nritems(src);
637
638         if (nritems == 0) {
639                 size = sizeof(struct btrfs_header);
640                 memset(dst + size, 0, src->len - size);
641         } else if (level == 0) {
642                 size = btrfs_leaf_data(src) +
643                         btrfs_item_offset_nr(src, nritems - 1) -
644                         btrfs_item_nr_offset(nritems);
645                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
646                 zero_items(md, dst, src);
647         } else {
648                 size = offsetof(struct btrfs_node, ptrs) +
649                         sizeof(struct btrfs_key_ptr) * nritems;
650                 memset(dst + size, 0, src->len - size);
651         }
652         csum_block(dst, src->len);
653 }
654
655 static void *dump_worker(void *data)
656 {
657         struct metadump_struct *md = (struct metadump_struct *)data;
658         struct async_work *async;
659         int ret;
660
661         while (1) {
662                 pthread_mutex_lock(&md->mutex);
663                 while (list_empty(&md->list)) {
664                         if (md->done) {
665                                 pthread_mutex_unlock(&md->mutex);
666                                 goto out;
667                         }
668                         pthread_cond_wait(&md->cond, &md->mutex);
669                 }
670                 async = list_entry(md->list.next, struct async_work, list);
671                 list_del_init(&async->list);
672                 pthread_mutex_unlock(&md->mutex);
673
674                 if (md->compress_level > 0) {
675                         u8 *orig = async->buffer;
676
677                         async->bufsize = compressBound(async->size);
678                         async->buffer = malloc(async->bufsize);
679                         if (!async->buffer) {
680                                 error("not enough memory for async buffer");
681                                 pthread_mutex_lock(&md->mutex);
682                                 if (!md->error)
683                                         md->error = -ENOMEM;
684                                 pthread_mutex_unlock(&md->mutex);
685                                 pthread_exit(NULL);
686                         }
687
688                         ret = compress2(async->buffer,
689                                          (unsigned long *)&async->bufsize,
690                                          orig, async->size, md->compress_level);
691
692                         if (ret != Z_OK)
693                                 async->error = 1;
694
695                         free(orig);
696                 }
697
698                 pthread_mutex_lock(&md->mutex);
699                 md->num_ready++;
700                 pthread_mutex_unlock(&md->mutex);
701         }
702 out:
703         pthread_exit(NULL);
704 }
705
706 static void meta_cluster_init(struct metadump_struct *md, u64 start)
707 {
708         struct meta_cluster_header *header;
709
710         md->num_items = 0;
711         md->num_ready = 0;
712         header = &md->cluster.header;
713         header->magic = cpu_to_le64(HEADER_MAGIC);
714         header->bytenr = cpu_to_le64(start);
715         header->nritems = cpu_to_le32(0);
716         header->compress = md->compress_level > 0 ?
717                            COMPRESS_ZLIB : COMPRESS_NONE;
718 }
719
720 static void metadump_destroy(struct metadump_struct *md, int num_threads)
721 {
722         int i;
723         struct rb_node *n;
724
725         pthread_mutex_lock(&md->mutex);
726         md->done = 1;
727         pthread_cond_broadcast(&md->cond);
728         pthread_mutex_unlock(&md->mutex);
729
730         for (i = 0; i < num_threads; i++)
731                 pthread_join(md->threads[i], NULL);
732
733         pthread_cond_destroy(&md->cond);
734         pthread_mutex_destroy(&md->mutex);
735
736         while ((n = rb_first(&md->name_tree))) {
737                 struct name *name;
738
739                 name = rb_entry(n, struct name, n);
740                 rb_erase(n, &md->name_tree);
741                 free(name->val);
742                 free(name->sub);
743                 free(name);
744         }
745 }
746
747 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
748                          FILE *out, int num_threads, int compress_level,
749                          int sanitize_names)
750 {
751         int i, ret = 0;
752
753         memset(md, 0, sizeof(*md));
754         INIT_LIST_HEAD(&md->list);
755         INIT_LIST_HEAD(&md->ordered);
756         md->root = root;
757         md->out = out;
758         md->pending_start = (u64)-1;
759         md->compress_level = compress_level;
760         md->sanitize_names = sanitize_names;
761         if (sanitize_names > 1)
762                 crc32c_optimization_init();
763
764         md->name_tree.rb_node = NULL;
765         md->num_threads = num_threads;
766         pthread_cond_init(&md->cond, NULL);
767         pthread_mutex_init(&md->mutex, NULL);
768         meta_cluster_init(md, 0);
769
770         if (!num_threads)
771                 return 0;
772
773         for (i = 0; i < num_threads; i++) {
774                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
775                 if (ret)
776                         break;
777         }
778
779         if (ret)
780                 metadump_destroy(md, i + 1);
781
782         return ret;
783 }
784
785 static int write_zero(FILE *out, size_t size)
786 {
787         static char zero[BLOCK_SIZE];
788         return fwrite(zero, size, 1, out);
789 }
790
791 static int write_buffers(struct metadump_struct *md, u64 *next)
792 {
793         struct meta_cluster_header *header = &md->cluster.header;
794         struct meta_cluster_item *item;
795         struct async_work *async;
796         u64 bytenr = 0;
797         u32 nritems = 0;
798         int ret;
799         int err = 0;
800
801         if (list_empty(&md->ordered))
802                 goto out;
803
804         /* wait until all buffers are compressed */
805         while (!err && md->num_items > md->num_ready) {
806                 struct timespec ts = {
807                         .tv_sec = 0,
808                         .tv_nsec = 10000000,
809                 };
810                 pthread_mutex_unlock(&md->mutex);
811                 nanosleep(&ts, NULL);
812                 pthread_mutex_lock(&md->mutex);
813                 err = md->error;
814         }
815
816         if (err) {
817                 error("one of the threads failed: %s", strerror(-err));
818                 goto out;
819         }
820
821         /* setup and write index block */
822         list_for_each_entry(async, &md->ordered, ordered) {
823                 item = &md->cluster.items[nritems];
824                 item->bytenr = cpu_to_le64(async->start);
825                 item->size = cpu_to_le32(async->bufsize);
826                 nritems++;
827         }
828         header->nritems = cpu_to_le32(nritems);
829
830         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
831         if (ret != 1) {
832                 error("unable to write out cluster: %s", strerror(errno));
833                 return -errno;
834         }
835
836         /* write buffers */
837         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
838         while (!list_empty(&md->ordered)) {
839                 async = list_entry(md->ordered.next, struct async_work,
840                                    ordered);
841                 list_del_init(&async->ordered);
842
843                 bytenr += async->bufsize;
844                 if (!err)
845                         ret = fwrite(async->buffer, async->bufsize, 1,
846                                      md->out);
847                 if (ret != 1) {
848                         error("unable to write out cluster: %s",
849                                 strerror(errno));
850                         err = -errno;
851                         ret = 0;
852                 }
853
854                 free(async->buffer);
855                 free(async);
856         }
857
858         /* zero unused space in the last block */
859         if (!err && bytenr & BLOCK_MASK) {
860                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
861
862                 bytenr += size;
863                 ret = write_zero(md->out, size);
864                 if (ret != 1) {
865                         error("unable to zero out buffer: %s",
866                                 strerror(errno));
867                         err = -errno;
868                 }
869         }
870 out:
871         *next = bytenr;
872         return err;
873 }
874
875 static int read_data_extent(struct metadump_struct *md,
876                             struct async_work *async)
877 {
878         struct btrfs_root *root = md->root;
879         u64 bytes_left = async->size;
880         u64 logical = async->start;
881         u64 offset = 0;
882         u64 read_len;
883         int num_copies;
884         int cur_mirror;
885         int ret;
886
887         num_copies = btrfs_num_copies(root->fs_info, logical, bytes_left);
888
889         /* Try our best to read data, just like read_tree_block() */
890         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
891                 while (bytes_left) {
892                         read_len = bytes_left;
893                         ret = read_extent_data(root,
894                                         (char *)(async->buffer + offset),
895                                         logical, &read_len, cur_mirror);
896                         if (ret < 0)
897                                 break;
898                         offset += read_len;
899                         logical += read_len;
900                         bytes_left -= read_len;
901                 }
902         }
903         if (bytes_left)
904                 return -EIO;
905         return 0;
906 }
907
908 static int get_dev_fd(struct btrfs_root *root)
909 {
910         struct btrfs_device *dev;
911
912         dev = list_first_entry(&root->fs_info->fs_devices->devices,
913                                struct btrfs_device, dev_list);
914         return dev->fd;
915 }
916
917 static int flush_pending(struct metadump_struct *md, int done)
918 {
919         struct async_work *async = NULL;
920         struct extent_buffer *eb;
921         u64 blocksize = md->root->fs_info->nodesize;
922         u64 start = 0;
923         u64 size;
924         size_t offset;
925         int ret = 0;
926
927         if (md->pending_size) {
928                 async = calloc(1, sizeof(*async));
929                 if (!async)
930                         return -ENOMEM;
931
932                 async->start = md->pending_start;
933                 async->size = md->pending_size;
934                 async->bufsize = async->size;
935                 async->buffer = malloc(async->bufsize);
936                 if (!async->buffer) {
937                         free(async);
938                         return -ENOMEM;
939                 }
940                 offset = 0;
941                 start = async->start;
942                 size = async->size;
943
944                 if (md->data) {
945                         ret = read_data_extent(md, async);
946                         if (ret) {
947                                 free(async->buffer);
948                                 free(async);
949                                 return ret;
950                         }
951                 }
952
953                 /*
954                  * Balance can make the mapping not cover the super block, so
955                  * just copy directly from one of the devices.
956                  */
957                 if (start == BTRFS_SUPER_INFO_OFFSET) {
958                         int fd = get_dev_fd(md->root);
959
960                         ret = pread64(fd, async->buffer, size, start);
961                         if (ret < size) {
962                                 free(async->buffer);
963                                 free(async);
964                                 error("unable to read superblock at %llu: %s",
965                                                 (unsigned long long)start,
966                                                 strerror(errno));
967                                 return -errno;
968                         }
969                         size = 0;
970                         ret = 0;
971                 }
972
973                 while (!md->data && size > 0) {
974                         u64 this_read = min(blocksize, size);
975                         eb = read_tree_block(md->root->fs_info, start,
976                                              this_read, 0);
977                         if (!extent_buffer_uptodate(eb)) {
978                                 free(async->buffer);
979                                 free(async);
980                                 error("unable to read metadata block %llu",
981                                         (unsigned long long)start);
982                                 return -EIO;
983                         }
984                         copy_buffer(md, async->buffer + offset, eb);
985                         free_extent_buffer(eb);
986                         start += this_read;
987                         offset += this_read;
988                         size -= this_read;
989                 }
990
991                 md->pending_start = (u64)-1;
992                 md->pending_size = 0;
993         } else if (!done) {
994                 return 0;
995         }
996
997         pthread_mutex_lock(&md->mutex);
998         if (async) {
999                 list_add_tail(&async->ordered, &md->ordered);
1000                 md->num_items++;
1001                 if (md->compress_level > 0) {
1002                         list_add_tail(&async->list, &md->list);
1003                         pthread_cond_signal(&md->cond);
1004                 } else {
1005                         md->num_ready++;
1006                 }
1007         }
1008         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1009                 ret = write_buffers(md, &start);
1010                 if (ret)
1011                         error("unable to write buffers: %s", strerror(-ret));
1012                 else
1013                         meta_cluster_init(md, start);
1014         }
1015         pthread_mutex_unlock(&md->mutex);
1016         return ret;
1017 }
1018
1019 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1020                       int data)
1021 {
1022         int ret;
1023         if (md->data != data ||
1024             md->pending_size + size > MAX_PENDING_SIZE ||
1025             md->pending_start + md->pending_size != start) {
1026                 ret = flush_pending(md, 0);
1027                 if (ret)
1028                         return ret;
1029                 md->pending_start = start;
1030         }
1031         readahead_tree_block(md->root, start, size, 0);
1032         md->pending_size += size;
1033         md->data = data;
1034         return 0;
1035 }
1036
1037 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1038 static int is_tree_block(struct btrfs_root *extent_root,
1039                          struct btrfs_path *path, u64 bytenr)
1040 {
1041         struct extent_buffer *leaf;
1042         struct btrfs_key key;
1043         u64 ref_objectid;
1044         int ret;
1045
1046         leaf = path->nodes[0];
1047         while (1) {
1048                 struct btrfs_extent_ref_v0 *ref_item;
1049                 path->slots[0]++;
1050                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1051                         ret = btrfs_next_leaf(extent_root, path);
1052                         if (ret < 0)
1053                                 return ret;
1054                         if (ret > 0)
1055                                 break;
1056                         leaf = path->nodes[0];
1057                 }
1058                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1059                 if (key.objectid != bytenr)
1060                         break;
1061                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1062                         continue;
1063                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1064                                           struct btrfs_extent_ref_v0);
1065                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1066                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1067                         return 1;
1068                 break;
1069         }
1070         return 0;
1071 }
1072 #endif
1073
1074 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1075                             struct metadump_struct *metadump, int root_tree)
1076 {
1077         struct extent_buffer *tmp;
1078         struct btrfs_root_item *ri;
1079         struct btrfs_key key;
1080         struct btrfs_fs_info *fs_info = root->fs_info;
1081         u64 bytenr;
1082         int level;
1083         int nritems = 0;
1084         int i = 0;
1085         int ret;
1086
1087         ret = add_extent(btrfs_header_bytenr(eb), fs_info->nodesize,
1088                          metadump, 0);
1089         if (ret) {
1090                 error("unable to add metadata block %llu: %d",
1091                                 btrfs_header_bytenr(eb), ret);
1092                 return ret;
1093         }
1094
1095         if (btrfs_header_level(eb) == 0 && !root_tree)
1096                 return 0;
1097
1098         level = btrfs_header_level(eb);
1099         nritems = btrfs_header_nritems(eb);
1100         for (i = 0; i < nritems; i++) {
1101                 if (level == 0) {
1102                         btrfs_item_key_to_cpu(eb, &key, i);
1103                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1104                                 continue;
1105                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1106                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1107                         tmp = read_tree_block(fs_info, bytenr,
1108                                               fs_info->nodesize, 0);
1109                         if (!extent_buffer_uptodate(tmp)) {
1110                                 error("unable to read log root block");
1111                                 return -EIO;
1112                         }
1113                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1114                         free_extent_buffer(tmp);
1115                         if (ret)
1116                                 return ret;
1117                 } else {
1118                         bytenr = btrfs_node_blockptr(eb, i);
1119                         tmp = read_tree_block(fs_info, bytenr,
1120                                               fs_info->nodesize, 0);
1121                         if (!extent_buffer_uptodate(tmp)) {
1122                                 error("unable to read log root block");
1123                                 return -EIO;
1124                         }
1125                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1126                         free_extent_buffer(tmp);
1127                         if (ret)
1128                                 return ret;
1129                 }
1130         }
1131
1132         return 0;
1133 }
1134
1135 static int copy_log_trees(struct btrfs_root *root,
1136                           struct metadump_struct *metadump)
1137 {
1138         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1139
1140         if (blocknr == 0)
1141                 return 0;
1142
1143         if (!root->fs_info->log_root_tree ||
1144             !root->fs_info->log_root_tree->node) {
1145                 error("unable to copy tree log, it has not been setup");
1146                 return -EIO;
1147         }
1148
1149         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1150                                 metadump, 1);
1151 }
1152
1153 static int copy_space_cache(struct btrfs_root *root,
1154                             struct metadump_struct *metadump,
1155                             struct btrfs_path *path)
1156 {
1157         struct extent_buffer *leaf;
1158         struct btrfs_file_extent_item *fi;
1159         struct btrfs_key key;
1160         u64 bytenr, num_bytes;
1161         int ret;
1162
1163         root = root->fs_info->tree_root;
1164
1165         key.objectid = 0;
1166         key.type = BTRFS_EXTENT_DATA_KEY;
1167         key.offset = 0;
1168
1169         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1170         if (ret < 0) {
1171                 error("free space inode not found: %d", ret);
1172                 return ret;
1173         }
1174
1175         leaf = path->nodes[0];
1176
1177         while (1) {
1178                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1179                         ret = btrfs_next_leaf(root, path);
1180                         if (ret < 0) {
1181                                 error("cannot go to next leaf %d", ret);
1182                                 return ret;
1183                         }
1184                         if (ret > 0)
1185                                 break;
1186                         leaf = path->nodes[0];
1187                 }
1188
1189                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1190                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1191                         path->slots[0]++;
1192                         continue;
1193                 }
1194
1195                 fi = btrfs_item_ptr(leaf, path->slots[0],
1196                                     struct btrfs_file_extent_item);
1197                 if (btrfs_file_extent_type(leaf, fi) !=
1198                     BTRFS_FILE_EXTENT_REG) {
1199                         path->slots[0]++;
1200                         continue;
1201                 }
1202
1203                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1204                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1205                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1206                 if (ret) {
1207                         error("unable to add space cache blocks %d", ret);
1208                         btrfs_release_path(path);
1209                         return ret;
1210                 }
1211                 path->slots[0]++;
1212         }
1213
1214         return 0;
1215 }
1216
1217 static int copy_from_extent_tree(struct metadump_struct *metadump,
1218                                  struct btrfs_path *path)
1219 {
1220         struct btrfs_root *extent_root;
1221         struct extent_buffer *leaf;
1222         struct btrfs_extent_item *ei;
1223         struct btrfs_key key;
1224         u64 bytenr;
1225         u64 num_bytes;
1226         int ret;
1227
1228         extent_root = metadump->root->fs_info->extent_root;
1229         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1230         key.objectid = bytenr;
1231         key.type = BTRFS_EXTENT_ITEM_KEY;
1232         key.offset = 0;
1233
1234         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1235         if (ret < 0) {
1236                 error("extent root not found: %d", ret);
1237                 return ret;
1238         }
1239         ret = 0;
1240
1241         leaf = path->nodes[0];
1242
1243         while (1) {
1244                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1245                         ret = btrfs_next_leaf(extent_root, path);
1246                         if (ret < 0) {
1247                                 error("cannot go to next leaf %d", ret);
1248                                 break;
1249                         }
1250                         if (ret > 0) {
1251                                 ret = 0;
1252                                 break;
1253                         }
1254                         leaf = path->nodes[0];
1255                 }
1256
1257                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1258                 if (key.objectid < bytenr ||
1259                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1260                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1261                         path->slots[0]++;
1262                         continue;
1263                 }
1264
1265                 bytenr = key.objectid;
1266                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1267                         num_bytes = extent_root->fs_info->nodesize;
1268                 } else {
1269                         num_bytes = key.offset;
1270                 }
1271
1272                 if (num_bytes == 0) {
1273                         error("extent length 0 at bytenr %llu key type %d",
1274                                         (unsigned long long)bytenr, key.type);
1275                         ret = -EIO;
1276                         break;
1277                 }
1278
1279                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1280                         ei = btrfs_item_ptr(leaf, path->slots[0],
1281                                             struct btrfs_extent_item);
1282                         if (btrfs_extent_flags(leaf, ei) &
1283                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1284                                 ret = add_extent(bytenr, num_bytes, metadump,
1285                                                  0);
1286                                 if (ret) {
1287                                         error("unable to add block %llu: %d",
1288                                                 (unsigned long long)bytenr, ret);
1289                                         break;
1290                                 }
1291                         }
1292                 } else {
1293 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1294                         ret = is_tree_block(extent_root, path, bytenr);
1295                         if (ret < 0) {
1296                                 error("failed to check tree block %llu: %d",
1297                                         (unsigned long long)bytenr, ret);
1298                                 break;
1299                         }
1300
1301                         if (ret) {
1302                                 ret = add_extent(bytenr, num_bytes, metadump,
1303                                                  0);
1304                                 if (ret) {
1305                                         error("unable to add block %llu: %d",
1306                                                 (unsigned long long)bytenr, ret);
1307                                         break;
1308                                 }
1309                         }
1310                         ret = 0;
1311 #else
1312                         error(
1313         "either extent tree is corrupted or you haven't built with V0 support");
1314                         ret = -EIO;
1315                         break;
1316 #endif
1317                 }
1318                 bytenr += num_bytes;
1319         }
1320
1321         btrfs_release_path(path);
1322
1323         return ret;
1324 }
1325
1326 static int create_metadump(const char *input, FILE *out, int num_threads,
1327                            int compress_level, int sanitize, int walk_trees)
1328 {
1329         struct btrfs_root *root;
1330         struct btrfs_path path;
1331         struct metadump_struct metadump;
1332         int ret;
1333         int err = 0;
1334
1335         root = open_ctree(input, 0, 0);
1336         if (!root) {
1337                 error("open ctree failed");
1338                 return -EIO;
1339         }
1340
1341         ret = metadump_init(&metadump, root, out, num_threads,
1342                             compress_level, sanitize);
1343         if (ret) {
1344                 error("failed to initialize metadump: %d", ret);
1345                 close_ctree(root);
1346                 return ret;
1347         }
1348
1349         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1350                         &metadump, 0);
1351         if (ret) {
1352                 error("unable to add metadata: %d", ret);
1353                 err = ret;
1354                 goto out;
1355         }
1356
1357         btrfs_init_path(&path);
1358
1359         if (walk_trees) {
1360                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1361                                        &metadump, 1);
1362                 if (ret) {
1363                         err = ret;
1364                         goto out;
1365                 }
1366
1367                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1368                                        &metadump, 1);
1369                 if (ret) {
1370                         err = ret;
1371                         goto out;
1372                 }
1373         } else {
1374                 ret = copy_from_extent_tree(&metadump, &path);
1375                 if (ret) {
1376                         err = ret;
1377                         goto out;
1378                 }
1379         }
1380
1381         ret = copy_log_trees(root, &metadump);
1382         if (ret) {
1383                 err = ret;
1384                 goto out;
1385         }
1386
1387         ret = copy_space_cache(root, &metadump, &path);
1388 out:
1389         ret = flush_pending(&metadump, 1);
1390         if (ret) {
1391                 if (!err)
1392                         err = ret;
1393                 error("failed to flush pending data: %d", ret);
1394         }
1395
1396         metadump_destroy(&metadump, num_threads);
1397
1398         btrfs_release_path(&path);
1399         ret = close_ctree(root);
1400         return err ? err : ret;
1401 }
1402
1403 static void update_super_old(u8 *buffer)
1404 {
1405         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1406         struct btrfs_chunk *chunk;
1407         struct btrfs_disk_key *key;
1408         u32 sectorsize = btrfs_super_sectorsize(super);
1409         u64 flags = btrfs_super_flags(super);
1410
1411         flags |= BTRFS_SUPER_FLAG_METADUMP;
1412         btrfs_set_super_flags(super, flags);
1413
1414         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1415         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1416                                        sizeof(struct btrfs_disk_key));
1417
1418         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1419         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1420         btrfs_set_disk_key_offset(key, 0);
1421
1422         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1423         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1424         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1425         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1426         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1427         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1428         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1429         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1430         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1431         chunk->stripe.devid = super->dev_item.devid;
1432         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1433         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1434         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1435         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1436 }
1437
1438 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1439 {
1440         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1441         struct btrfs_chunk *chunk;
1442         struct btrfs_disk_key *disk_key;
1443         struct btrfs_key key;
1444         u64 flags = btrfs_super_flags(super);
1445         u32 new_array_size = 0;
1446         u32 array_size;
1447         u32 cur = 0;
1448         u8 *ptr, *write_ptr;
1449         int old_num_stripes;
1450
1451         write_ptr = ptr = super->sys_chunk_array;
1452         array_size = btrfs_super_sys_array_size(super);
1453
1454         while (cur < array_size) {
1455                 disk_key = (struct btrfs_disk_key *)ptr;
1456                 btrfs_disk_key_to_cpu(&key, disk_key);
1457
1458                 new_array_size += sizeof(*disk_key);
1459                 memmove(write_ptr, ptr, sizeof(*disk_key));
1460
1461                 write_ptr += sizeof(*disk_key);
1462                 ptr += sizeof(*disk_key);
1463                 cur += sizeof(*disk_key);
1464
1465                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1466                         u64 type, physical, physical_dup, size = 0;
1467
1468                         chunk = (struct btrfs_chunk *)ptr;
1469                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1470                         chunk = (struct btrfs_chunk *)write_ptr;
1471
1472                         memmove(write_ptr, ptr, sizeof(*chunk));
1473                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1474                         type = btrfs_stack_chunk_type(chunk);
1475                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1476                                 new_array_size += sizeof(struct btrfs_stripe);
1477                                 write_ptr += sizeof(struct btrfs_stripe);
1478                         } else {
1479                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1480                                 btrfs_set_stack_chunk_type(chunk,
1481                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1482                         }
1483                         chunk->stripe.devid = super->dev_item.devid;
1484                         physical = logical_to_physical(mdres, key.offset,
1485                                                        &size, &physical_dup);
1486                         if (size != (u64)-1)
1487                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1488                                                               physical);
1489                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1490                                BTRFS_UUID_SIZE);
1491                         new_array_size += sizeof(*chunk);
1492                 } else {
1493                         error("bogus key in the sys array %d", key.type);
1494                         return -EIO;
1495                 }
1496                 write_ptr += sizeof(*chunk);
1497                 ptr += btrfs_chunk_item_size(old_num_stripes);
1498                 cur += btrfs_chunk_item_size(old_num_stripes);
1499         }
1500
1501         if (mdres->clear_space_cache)
1502                 btrfs_set_super_cache_generation(super, 0);
1503
1504         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1505         btrfs_set_super_flags(super, flags);
1506         btrfs_set_super_sys_array_size(super, new_array_size);
1507         btrfs_set_super_num_devices(super, 1);
1508         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1509
1510         return 0;
1511 }
1512
1513 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1514 {
1515         struct extent_buffer *eb;
1516
1517         eb = calloc(1, sizeof(struct extent_buffer) + size);
1518         if (!eb)
1519                 return NULL;
1520
1521         eb->start = bytenr;
1522         eb->len = size;
1523         return eb;
1524 }
1525
1526 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1527 {
1528         struct btrfs_item *item;
1529         u32 nritems;
1530         u32 old_size;
1531         u32 old_data_start;
1532         u32 size_diff;
1533         u32 data_end;
1534         int i;
1535
1536         old_size = btrfs_item_size_nr(eb, slot);
1537         if (old_size == new_size)
1538                 return;
1539
1540         nritems = btrfs_header_nritems(eb);
1541         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1542
1543         old_data_start = btrfs_item_offset_nr(eb, slot);
1544         size_diff = old_size - new_size;
1545
1546         for (i = slot; i < nritems; i++) {
1547                 u32 ioff;
1548                 item = btrfs_item_nr(i);
1549                 ioff = btrfs_item_offset(eb, item);
1550                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1551         }
1552
1553         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1554                               btrfs_leaf_data(eb) + data_end,
1555                               old_data_start + new_size - data_end);
1556         item = btrfs_item_nr(slot);
1557         btrfs_set_item_size(eb, item, new_size);
1558 }
1559
1560 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1561                                   struct async_work *async, u8 *buffer,
1562                                   size_t size)
1563 {
1564         struct extent_buffer *eb;
1565         size_t size_left = size;
1566         u64 bytenr = async->start;
1567         int i;
1568
1569         if (size_left % mdres->nodesize)
1570                 return 0;
1571
1572         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1573         if (!eb)
1574                 return -ENOMEM;
1575
1576         while (size_left) {
1577                 eb->start = bytenr;
1578                 memcpy(eb->data, buffer, mdres->nodesize);
1579
1580                 if (btrfs_header_bytenr(eb) != bytenr)
1581                         break;
1582                 if (memcmp(mdres->fsid,
1583                            eb->data + offsetof(struct btrfs_header, fsid),
1584                            BTRFS_FSID_SIZE))
1585                         break;
1586
1587                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1588                         goto next;
1589
1590                 if (btrfs_header_level(eb) != 0)
1591                         goto next;
1592
1593                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1594                         struct btrfs_chunk *chunk;
1595                         struct btrfs_key key;
1596                         u64 type, physical, physical_dup, size = (u64)-1;
1597
1598                         btrfs_item_key_to_cpu(eb, &key, i);
1599                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1600                                 continue;
1601
1602                         size = 0;
1603                         physical = logical_to_physical(mdres, key.offset,
1604                                                        &size, &physical_dup);
1605
1606                         if (!physical_dup)
1607                                 truncate_item(eb, i, sizeof(*chunk));
1608                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1609
1610
1611                         /* Zero out the RAID profile */
1612                         type = btrfs_chunk_type(eb, chunk);
1613                         type &= (BTRFS_BLOCK_GROUP_DATA |
1614                                  BTRFS_BLOCK_GROUP_SYSTEM |
1615                                  BTRFS_BLOCK_GROUP_METADATA |
1616                                  BTRFS_BLOCK_GROUP_DUP);
1617                         btrfs_set_chunk_type(eb, chunk, type);
1618
1619                         if (!physical_dup)
1620                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1621                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1622                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1623                         if (size != (u64)-1)
1624                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1625                                                            physical);
1626                         /* update stripe 2 offset */
1627                         if (physical_dup)
1628                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1629                                                            physical_dup);
1630
1631                         write_extent_buffer(eb, mdres->uuid,
1632                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1633                                                 chunk, 0),
1634                                         BTRFS_UUID_SIZE);
1635                 }
1636                 memcpy(buffer, eb->data, eb->len);
1637                 csum_block(buffer, eb->len);
1638 next:
1639                 size_left -= mdres->nodesize;
1640                 buffer += mdres->nodesize;
1641                 bytenr += mdres->nodesize;
1642         }
1643
1644         free(eb);
1645         return 0;
1646 }
1647
1648 static void write_backup_supers(int fd, u8 *buf)
1649 {
1650         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1651         struct stat st;
1652         u64 size;
1653         u64 bytenr;
1654         int i;
1655         int ret;
1656
1657         if (fstat(fd, &st)) {
1658                 error(
1659         "cannot stat restore point, won't be able to write backup supers: %s",
1660                         strerror(errno));
1661                 return;
1662         }
1663
1664         size = btrfs_device_size(fd, &st);
1665
1666         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1667                 bytenr = btrfs_sb_offset(i);
1668                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1669                         break;
1670                 btrfs_set_super_bytenr(super, bytenr);
1671                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1672                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1673                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1674                         if (ret < 0)
1675                                 error(
1676                                 "problem writing out backup super block %d: %s",
1677                                                 i, strerror(errno));
1678                         else
1679                                 error("short write writing out backup super block");
1680                         break;
1681                 }
1682         }
1683 }
1684
1685 static void *restore_worker(void *data)
1686 {
1687         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1688         struct async_work *async;
1689         size_t size;
1690         u8 *buffer;
1691         u8 *outbuf;
1692         int outfd;
1693         int ret;
1694         int compress_size = MAX_PENDING_SIZE * 4;
1695
1696         outfd = fileno(mdres->out);
1697         buffer = malloc(compress_size);
1698         if (!buffer) {
1699                 error("not enough memory for restore worker buffer");
1700                 pthread_mutex_lock(&mdres->mutex);
1701                 if (!mdres->error)
1702                         mdres->error = -ENOMEM;
1703                 pthread_mutex_unlock(&mdres->mutex);
1704                 pthread_exit(NULL);
1705         }
1706
1707         while (1) {
1708                 u64 bytenr, physical_dup;
1709                 off_t offset = 0;
1710                 int err = 0;
1711
1712                 pthread_mutex_lock(&mdres->mutex);
1713                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1714                         if (mdres->done) {
1715                                 pthread_mutex_unlock(&mdres->mutex);
1716                                 goto out;
1717                         }
1718                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1719                 }
1720                 async = list_entry(mdres->list.next, struct async_work, list);
1721                 list_del_init(&async->list);
1722                 pthread_mutex_unlock(&mdres->mutex);
1723
1724                 if (mdres->compress_method == COMPRESS_ZLIB) {
1725                         size = compress_size; 
1726                         ret = uncompress(buffer, (unsigned long *)&size,
1727                                          async->buffer, async->bufsize);
1728                         if (ret != Z_OK) {
1729                                 error("decompressiion failed with %d", ret);
1730                                 err = -EIO;
1731                         }
1732                         outbuf = buffer;
1733                 } else {
1734                         outbuf = async->buffer;
1735                         size = async->bufsize;
1736                 }
1737
1738                 if (!mdres->multi_devices) {
1739                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1740                                 if (mdres->old_restore) {
1741                                         update_super_old(outbuf);
1742                                 } else {
1743                                         ret = update_super(mdres, outbuf);
1744                                         if (ret)
1745                                                 err = ret;
1746                                 }
1747                         } else if (!mdres->old_restore) {
1748                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1749                                 if (ret)
1750                                         err = ret;
1751                         }
1752                 }
1753
1754                 if (!mdres->fixup_offset) {
1755                         while (size) {
1756                                 u64 chunk_size = size;
1757                                 physical_dup = 0;
1758                                 if (!mdres->multi_devices && !mdres->old_restore)
1759                                         bytenr = logical_to_physical(mdres,
1760                                                      async->start + offset,
1761                                                      &chunk_size,
1762                                                      &physical_dup);
1763                                 else
1764                                         bytenr = async->start + offset;
1765
1766                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1767                                                bytenr);
1768                                 if (ret != chunk_size)
1769                                         goto error;
1770
1771                                 if (physical_dup)
1772                                         ret = pwrite64(outfd, outbuf+offset,
1773                                                        chunk_size,
1774                                                        physical_dup);
1775                                 if (ret != chunk_size)
1776                                         goto error;
1777
1778                                 size -= chunk_size;
1779                                 offset += chunk_size;
1780                                 continue;
1781
1782 error:
1783                                 if (ret < 0) {
1784                                         error("unable to write to device: %s",
1785                                                         strerror(errno));
1786                                         err = errno;
1787                                 } else {
1788                                         error("short write");
1789                                         err = -EIO;
1790                                 }
1791                         }
1792                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1793                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1794                         if (ret) {
1795                                 error("failed to write data");
1796                                 exit(1);
1797                         }
1798                 }
1799
1800
1801                 /* backup super blocks are already there at fixup_offset stage */
1802                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1803                         write_backup_supers(outfd, outbuf);
1804
1805                 pthread_mutex_lock(&mdres->mutex);
1806                 if (err && !mdres->error)
1807                         mdres->error = err;
1808                 mdres->num_items--;
1809                 pthread_mutex_unlock(&mdres->mutex);
1810
1811                 free(async->buffer);
1812                 free(async);
1813         }
1814 out:
1815         free(buffer);
1816         pthread_exit(NULL);
1817 }
1818
1819 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1820 {
1821         struct rb_node *n;
1822         int i;
1823
1824         while ((n = rb_first(&mdres->chunk_tree))) {
1825                 struct fs_chunk *entry;
1826
1827                 entry = rb_entry(n, struct fs_chunk, l);
1828                 rb_erase(n, &mdres->chunk_tree);
1829                 rb_erase(&entry->p, &mdres->physical_tree);
1830                 free(entry);
1831         }
1832         pthread_mutex_lock(&mdres->mutex);
1833         mdres->done = 1;
1834         pthread_cond_broadcast(&mdres->cond);
1835         pthread_mutex_unlock(&mdres->mutex);
1836
1837         for (i = 0; i < num_threads; i++)
1838                 pthread_join(mdres->threads[i], NULL);
1839
1840         pthread_cond_destroy(&mdres->cond);
1841         pthread_mutex_destroy(&mdres->mutex);
1842 }
1843
1844 static int mdrestore_init(struct mdrestore_struct *mdres,
1845                           FILE *in, FILE *out, int old_restore,
1846                           int num_threads, int fixup_offset,
1847                           struct btrfs_fs_info *info, int multi_devices)
1848 {
1849         int i, ret = 0;
1850
1851         memset(mdres, 0, sizeof(*mdres));
1852         pthread_cond_init(&mdres->cond, NULL);
1853         pthread_mutex_init(&mdres->mutex, NULL);
1854         INIT_LIST_HEAD(&mdres->list);
1855         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1856         mdres->in = in;
1857         mdres->out = out;
1858         mdres->old_restore = old_restore;
1859         mdres->chunk_tree.rb_node = NULL;
1860         mdres->fixup_offset = fixup_offset;
1861         mdres->info = info;
1862         mdres->multi_devices = multi_devices;
1863         mdres->clear_space_cache = 0;
1864         mdres->last_physical_offset = 0;
1865         mdres->alloced_chunks = 0;
1866
1867         if (!num_threads)
1868                 return 0;
1869
1870         mdres->num_threads = num_threads;
1871         for (i = 0; i < num_threads; i++) {
1872                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1873                                      mdres);
1874                 if (ret) {
1875                         /* pthread_create returns errno directly */
1876                         ret = -ret;
1877                         break;
1878                 }
1879         }
1880         if (ret)
1881                 mdrestore_destroy(mdres, i + 1);
1882         return ret;
1883 }
1884
1885 static int fill_mdres_info(struct mdrestore_struct *mdres,
1886                            struct async_work *async)
1887 {
1888         struct btrfs_super_block *super;
1889         u8 *buffer = NULL;
1890         u8 *outbuf;
1891         int ret;
1892
1893         /* We've already been initialized */
1894         if (mdres->nodesize)
1895                 return 0;
1896
1897         if (mdres->compress_method == COMPRESS_ZLIB) {
1898                 size_t size = MAX_PENDING_SIZE * 2;
1899
1900                 buffer = malloc(MAX_PENDING_SIZE * 2);
1901                 if (!buffer)
1902                         return -ENOMEM;
1903                 ret = uncompress(buffer, (unsigned long *)&size,
1904                                  async->buffer, async->bufsize);
1905                 if (ret != Z_OK) {
1906                         error("decompressiion failed with %d", ret);
1907                         free(buffer);
1908                         return -EIO;
1909                 }
1910                 outbuf = buffer;
1911         } else {
1912                 outbuf = async->buffer;
1913         }
1914
1915         super = (struct btrfs_super_block *)outbuf;
1916         mdres->nodesize = btrfs_super_nodesize(super);
1917         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1918         memcpy(mdres->uuid, super->dev_item.uuid,
1919                        BTRFS_UUID_SIZE);
1920         mdres->devid = le64_to_cpu(super->dev_item.devid);
1921         free(buffer);
1922         return 0;
1923 }
1924
1925 static int add_cluster(struct meta_cluster *cluster,
1926                        struct mdrestore_struct *mdres, u64 *next)
1927 {
1928         struct meta_cluster_item *item;
1929         struct meta_cluster_header *header = &cluster->header;
1930         struct async_work *async;
1931         u64 bytenr;
1932         u32 i, nritems;
1933         int ret;
1934
1935         mdres->compress_method = header->compress;
1936
1937         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1938         nritems = le32_to_cpu(header->nritems);
1939         for (i = 0; i < nritems; i++) {
1940                 item = &cluster->items[i];
1941                 async = calloc(1, sizeof(*async));
1942                 if (!async) {
1943                         error("not enough memory for async data");
1944                         return -ENOMEM;
1945                 }
1946                 async->start = le64_to_cpu(item->bytenr);
1947                 async->bufsize = le32_to_cpu(item->size);
1948                 async->buffer = malloc(async->bufsize);
1949                 if (!async->buffer) {
1950                         error("not enough memory for async buffer");
1951                         free(async);
1952                         return -ENOMEM;
1953                 }
1954                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1955                 if (ret != 1) {
1956                         error("unable to read buffer: %s", strerror(errno));
1957                         free(async->buffer);
1958                         free(async);
1959                         return -EIO;
1960                 }
1961                 bytenr += async->bufsize;
1962
1963                 pthread_mutex_lock(&mdres->mutex);
1964                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1965                         ret = fill_mdres_info(mdres, async);
1966                         if (ret) {
1967                                 error("unable to set up restore state");
1968                                 pthread_mutex_unlock(&mdres->mutex);
1969                                 free(async->buffer);
1970                                 free(async);
1971                                 return ret;
1972                         }
1973                 }
1974                 list_add_tail(&async->list, &mdres->list);
1975                 mdres->num_items++;
1976                 pthread_cond_signal(&mdres->cond);
1977                 pthread_mutex_unlock(&mdres->mutex);
1978         }
1979         if (bytenr & BLOCK_MASK) {
1980                 char buffer[BLOCK_MASK];
1981                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1982
1983                 bytenr += size;
1984                 ret = fread(buffer, size, 1, mdres->in);
1985                 if (ret != 1) {
1986                         error("failed to read buffer: %s", strerror(errno));
1987                         return -EIO;
1988                 }
1989         }
1990         *next = bytenr;
1991         return 0;
1992 }
1993
1994 static int wait_for_worker(struct mdrestore_struct *mdres)
1995 {
1996         int ret = 0;
1997
1998         pthread_mutex_lock(&mdres->mutex);
1999         ret = mdres->error;
2000         while (!ret && mdres->num_items > 0) {
2001                 struct timespec ts = {
2002                         .tv_sec = 0,
2003                         .tv_nsec = 10000000,
2004                 };
2005                 pthread_mutex_unlock(&mdres->mutex);
2006                 nanosleep(&ts, NULL);
2007                 pthread_mutex_lock(&mdres->mutex);
2008                 ret = mdres->error;
2009         }
2010         pthread_mutex_unlock(&mdres->mutex);
2011         return ret;
2012 }
2013
2014 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2015                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2016                             u64 cluster_bytenr)
2017 {
2018         struct extent_buffer *eb;
2019         int ret = 0;
2020         int i;
2021
2022         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2023         if (!eb) {
2024                 ret = -ENOMEM;
2025                 goto out;
2026         }
2027
2028         while (item_bytenr != bytenr) {
2029                 buffer += mdres->nodesize;
2030                 item_bytenr += mdres->nodesize;
2031         }
2032
2033         memcpy(eb->data, buffer, mdres->nodesize);
2034         if (btrfs_header_bytenr(eb) != bytenr) {
2035                 error("eb bytenr does not match found bytenr: %llu != %llu",
2036                                 (unsigned long long)btrfs_header_bytenr(eb),
2037                                 (unsigned long long)bytenr);
2038                 ret = -EIO;
2039                 goto out;
2040         }
2041
2042         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2043                    BTRFS_FSID_SIZE)) {
2044                 error("filesystem UUID of eb %llu does not match",
2045                                 (unsigned long long)bytenr);
2046                 ret = -EIO;
2047                 goto out;
2048         }
2049
2050         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2051                 error("wrong eb %llu owner %llu",
2052                                 (unsigned long long)bytenr,
2053                                 (unsigned long long)btrfs_header_owner(eb));
2054                 ret = -EIO;
2055                 goto out;
2056         }
2057
2058         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2059                 struct btrfs_chunk *chunk;
2060                 struct fs_chunk *fs_chunk;
2061                 struct btrfs_key key;
2062                 u64 type;
2063
2064                 if (btrfs_header_level(eb)) {
2065                         u64 blockptr = btrfs_node_blockptr(eb, i);
2066
2067                         ret = search_for_chunk_blocks(mdres, blockptr,
2068                                                       cluster_bytenr);
2069                         if (ret)
2070                                 break;
2071                         continue;
2072                 }
2073
2074                 /* Yay a leaf!  We loves leafs! */
2075                 btrfs_item_key_to_cpu(eb, &key, i);
2076                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2077                         continue;
2078
2079                 fs_chunk = malloc(sizeof(struct fs_chunk));
2080                 if (!fs_chunk) {
2081                         error("not enough memory to allocate chunk");
2082                         ret = -ENOMEM;
2083                         break;
2084                 }
2085                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2086                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2087
2088                 fs_chunk->logical = key.offset;
2089                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2090                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2091                 INIT_LIST_HEAD(&fs_chunk->list);
2092                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2093                                 physical_cmp, 1) != NULL)
2094                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2095                 else
2096                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2097                                     physical_cmp);
2098
2099                 type = btrfs_chunk_type(eb, chunk);
2100                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2101                         fs_chunk->physical_dup =
2102                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2103                 }
2104
2105                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2106                     mdres->last_physical_offset)
2107                         mdres->last_physical_offset = fs_chunk->physical_dup +
2108                                 fs_chunk->bytes;
2109                 else if (fs_chunk->physical + fs_chunk->bytes >
2110                     mdres->last_physical_offset)
2111                         mdres->last_physical_offset = fs_chunk->physical +
2112                                 fs_chunk->bytes;
2113                 mdres->alloced_chunks += fs_chunk->bytes;
2114                 /* in dup case, fs_chunk->bytes should add twice */
2115                 if (fs_chunk->physical_dup)
2116                         mdres->alloced_chunks += fs_chunk->bytes;
2117                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2118         }
2119 out:
2120         free(eb);
2121         return ret;
2122 }
2123
2124 /* If you have to ask you aren't worthy */
2125 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2126                                    u64 search, u64 cluster_bytenr)
2127 {
2128         struct meta_cluster *cluster;
2129         struct meta_cluster_header *header;
2130         struct meta_cluster_item *item;
2131         u64 current_cluster = cluster_bytenr, bytenr;
2132         u64 item_bytenr;
2133         u32 bufsize, nritems, i;
2134         u32 max_size = MAX_PENDING_SIZE * 2;
2135         u8 *buffer, *tmp = NULL;
2136         int ret = 0;
2137
2138         cluster = malloc(BLOCK_SIZE);
2139         if (!cluster) {
2140                 error("not enough memory for cluster");
2141                 return -ENOMEM;
2142         }
2143
2144         buffer = malloc(max_size);
2145         if (!buffer) {
2146                 error("not enough memory for buffer");
2147                 free(cluster);
2148                 return -ENOMEM;
2149         }
2150
2151         if (mdres->compress_method == COMPRESS_ZLIB) {
2152                 tmp = malloc(max_size);
2153                 if (!tmp) {
2154                         error("not enough memory for buffer");
2155                         free(cluster);
2156                         free(buffer);
2157                         return -ENOMEM;
2158                 }
2159         }
2160
2161         bytenr = current_cluster;
2162         while (1) {
2163                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2164                         error("seek failed: %s", strerror(errno));
2165                         ret = -EIO;
2166                         break;
2167                 }
2168
2169                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2170                 if (ret == 0) {
2171                         if (cluster_bytenr != 0) {
2172                                 cluster_bytenr = 0;
2173                                 current_cluster = 0;
2174                                 bytenr = 0;
2175                                 continue;
2176                         }
2177                         error(
2178         "unknown state after reading cluster at %llu, probably crrupted data",
2179                                         cluster_bytenr);
2180                         ret = -EIO;
2181                         break;
2182                 } else if (ret < 0) {
2183                         error("unable to read image at %llu: %s",
2184                                         (unsigned long long)cluster_bytenr,
2185                                         strerror(errno));
2186                         break;
2187                 }
2188                 ret = 0;
2189
2190                 header = &cluster->header;
2191                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2192                     le64_to_cpu(header->bytenr) != current_cluster) {
2193                         error("bad header in metadump image");
2194                         ret = -EIO;
2195                         break;
2196                 }
2197
2198                 bytenr += BLOCK_SIZE;
2199                 nritems = le32_to_cpu(header->nritems);
2200                 for (i = 0; i < nritems; i++) {
2201                         size_t size;
2202
2203                         item = &cluster->items[i];
2204                         bufsize = le32_to_cpu(item->size);
2205                         item_bytenr = le64_to_cpu(item->bytenr);
2206
2207                         if (bufsize > max_size) {
2208                                 error("item %u too big: %u > %u", i, bufsize,
2209                                                 max_size);
2210                                 ret = -EIO;
2211                                 break;
2212                         }
2213
2214                         if (mdres->compress_method == COMPRESS_ZLIB) {
2215                                 ret = fread(tmp, bufsize, 1, mdres->in);
2216                                 if (ret != 1) {
2217                                         error("read error: %s", strerror(errno));
2218                                         ret = -EIO;
2219                                         break;
2220                                 }
2221
2222                                 size = max_size;
2223                                 ret = uncompress(buffer,
2224                                                  (unsigned long *)&size, tmp,
2225                                                  bufsize);
2226                                 if (ret != Z_OK) {
2227                                         error("decompressiion failed with %d",
2228                                                         ret);
2229                                         ret = -EIO;
2230                                         break;
2231                                 }
2232                         } else {
2233                                 ret = fread(buffer, bufsize, 1, mdres->in);
2234                                 if (ret != 1) {
2235                                         error("read error: %s",
2236                                                         strerror(errno));
2237                                         ret = -EIO;
2238                                         break;
2239                                 }
2240                                 size = bufsize;
2241                         }
2242                         ret = 0;
2243
2244                         if (item_bytenr <= search &&
2245                             item_bytenr + size > search) {
2246                                 ret = read_chunk_block(mdres, buffer, search,
2247                                                        item_bytenr, size,
2248                                                        current_cluster);
2249                                 if (!ret)
2250                                         ret = 1;
2251                                 break;
2252                         }
2253                         bytenr += bufsize;
2254                 }
2255                 if (ret) {
2256                         if (ret > 0)
2257                                 ret = 0;
2258                         break;
2259                 }
2260                 if (bytenr & BLOCK_MASK)
2261                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2262                 current_cluster = bytenr;
2263         }
2264
2265         free(tmp);
2266         free(buffer);
2267         free(cluster);
2268         return ret;
2269 }
2270
2271 static int build_chunk_tree(struct mdrestore_struct *mdres,
2272                             struct meta_cluster *cluster)
2273 {
2274         struct btrfs_super_block *super;
2275         struct meta_cluster_header *header;
2276         struct meta_cluster_item *item = NULL;
2277         u64 chunk_root_bytenr = 0;
2278         u32 i, nritems;
2279         u64 bytenr = 0;
2280         u8 *buffer;
2281         int ret;
2282
2283         /* We can't seek with stdin so don't bother doing this */
2284         if (mdres->in == stdin)
2285                 return 0;
2286
2287         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2288         if (ret <= 0) {
2289                 error("unable to read cluster: %s", strerror(errno));
2290                 return -EIO;
2291         }
2292         ret = 0;
2293
2294         header = &cluster->header;
2295         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2296             le64_to_cpu(header->bytenr) != 0) {
2297                 error("bad header in metadump image");
2298                 return -EIO;
2299         }
2300
2301         bytenr += BLOCK_SIZE;
2302         mdres->compress_method = header->compress;
2303         nritems = le32_to_cpu(header->nritems);
2304         for (i = 0; i < nritems; i++) {
2305                 item = &cluster->items[i];
2306
2307                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2308                         break;
2309                 bytenr += le32_to_cpu(item->size);
2310                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2311                         error("seek failed: %s", strerror(errno));
2312                         return -EIO;
2313                 }
2314         }
2315
2316         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2317                 error("did not find superblock at %llu",
2318                                 le64_to_cpu(item->bytenr));
2319                 return -EINVAL;
2320         }
2321
2322         buffer = malloc(le32_to_cpu(item->size));
2323         if (!buffer) {
2324                 error("not enough memory to allocate buffer");
2325                 return -ENOMEM;
2326         }
2327
2328         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2329         if (ret != 1) {
2330                 error("unable to read buffer: %s", strerror(errno));
2331                 free(buffer);
2332                 return -EIO;
2333         }
2334
2335         if (mdres->compress_method == COMPRESS_ZLIB) {
2336                 size_t size = MAX_PENDING_SIZE * 2;
2337                 u8 *tmp;
2338
2339                 tmp = malloc(MAX_PENDING_SIZE * 2);
2340                 if (!tmp) {
2341                         free(buffer);
2342                         return -ENOMEM;
2343                 }
2344                 ret = uncompress(tmp, (unsigned long *)&size,
2345                                  buffer, le32_to_cpu(item->size));
2346                 if (ret != Z_OK) {
2347                         error("decompressiion failed with %d", ret);
2348                         free(buffer);
2349                         free(tmp);
2350                         return -EIO;
2351                 }
2352                 free(buffer);
2353                 buffer = tmp;
2354         }
2355
2356         pthread_mutex_lock(&mdres->mutex);
2357         super = (struct btrfs_super_block *)buffer;
2358         chunk_root_bytenr = btrfs_super_chunk_root(super);
2359         mdres->nodesize = btrfs_super_nodesize(super);
2360         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2361         memcpy(mdres->uuid, super->dev_item.uuid,
2362                        BTRFS_UUID_SIZE);
2363         mdres->devid = le64_to_cpu(super->dev_item.devid);
2364         free(buffer);
2365         pthread_mutex_unlock(&mdres->mutex);
2366
2367         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2368 }
2369
2370 static int range_contains_super(u64 physical, u64 bytes)
2371 {
2372         u64 super_bytenr;
2373         int i;
2374
2375         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2376                 super_bytenr = btrfs_sb_offset(i);
2377                 if (super_bytenr >= physical &&
2378                     super_bytenr < physical + bytes)
2379                         return 1;
2380         }
2381
2382         return 0;
2383 }
2384
2385 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2386 {
2387         struct fs_chunk *fs_chunk;
2388
2389         while (!list_empty(&mdres->overlapping_chunks)) {
2390                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2391                                             struct fs_chunk, list);
2392                 list_del_init(&fs_chunk->list);
2393                 if (range_contains_super(fs_chunk->physical,
2394                                          fs_chunk->bytes)) {
2395                         warning(
2396 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2397                         mdres->clear_space_cache = 1;
2398                 }
2399                 fs_chunk->physical = mdres->last_physical_offset;
2400                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2401                 mdres->last_physical_offset += fs_chunk->bytes;
2402         }
2403 }
2404
2405 static int fixup_devices(struct btrfs_fs_info *fs_info,
2406                          struct mdrestore_struct *mdres, off_t dev_size)
2407 {
2408         struct btrfs_trans_handle *trans;
2409         struct btrfs_dev_item *dev_item;
2410         struct btrfs_path path;
2411         struct extent_buffer *leaf;
2412         struct btrfs_root *root = fs_info->chunk_root;
2413         struct btrfs_key key;
2414         u64 devid, cur_devid;
2415         int ret;
2416
2417         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2418         if (IS_ERR(trans)) {
2419                 error("cannot starting transaction %ld", PTR_ERR(trans));
2420                 return PTR_ERR(trans);
2421         }
2422
2423         dev_item = &fs_info->super_copy->dev_item;
2424
2425         devid = btrfs_stack_device_id(dev_item);
2426
2427         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2428         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2429
2430         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2431         key.type = BTRFS_DEV_ITEM_KEY;
2432         key.offset = 0;
2433
2434         btrfs_init_path(&path);
2435
2436 again:
2437         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2438         if (ret < 0) {
2439                 error("search failed: %d", ret);
2440                 exit(1);
2441         }
2442
2443         while (1) {
2444                 leaf = path.nodes[0];
2445                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2446                         ret = btrfs_next_leaf(root, &path);
2447                         if (ret < 0) {
2448                                 error("cannot go to next leaf %d", ret);
2449                                 exit(1);
2450                         }
2451                         if (ret > 0) {
2452                                 ret = 0;
2453                                 break;
2454                         }
2455                         leaf = path.nodes[0];
2456                 }
2457
2458                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2459                 if (key.type > BTRFS_DEV_ITEM_KEY)
2460                         break;
2461                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2462                         path.slots[0]++;
2463                         continue;
2464                 }
2465
2466                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2467                                           struct btrfs_dev_item);
2468                 cur_devid = btrfs_device_id(leaf, dev_item);
2469                 if (devid != cur_devid) {
2470                         ret = btrfs_del_item(trans, root, &path);
2471                         if (ret) {
2472                                 error("cannot delete item: %d", ret);
2473                                 exit(1);
2474                         }
2475                         btrfs_release_path(&path);
2476                         goto again;
2477                 }
2478
2479                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2480                 btrfs_set_device_bytes_used(leaf, dev_item,
2481                                             mdres->alloced_chunks);
2482                 btrfs_mark_buffer_dirty(leaf);
2483                 path.slots[0]++;
2484         }
2485
2486         btrfs_release_path(&path);
2487         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2488         if (ret) {
2489                 error("unable to commit transaction: %d", ret);
2490                 return ret;
2491         }
2492         return 0;
2493 }
2494
2495 static int restore_metadump(const char *input, FILE *out, int old_restore,
2496                             int num_threads, int fixup_offset,
2497                             const char *target, int multi_devices)
2498 {
2499         struct meta_cluster *cluster = NULL;
2500         struct meta_cluster_header *header;
2501         struct mdrestore_struct mdrestore;
2502         struct btrfs_fs_info *info = NULL;
2503         u64 bytenr = 0;
2504         FILE *in = NULL;
2505         int ret = 0;
2506
2507         if (!strcmp(input, "-")) {
2508                 in = stdin;
2509         } else {
2510                 in = fopen(input, "r");
2511                 if (!in) {
2512                         error("unable to open metadump image: %s",
2513                                         strerror(errno));
2514                         return 1;
2515                 }
2516         }
2517
2518         /* NOTE: open with write mode */
2519         if (fixup_offset) {
2520                 info = open_ctree_fs_info(target, 0, 0, 0,
2521                                           OPEN_CTREE_WRITES |
2522                                           OPEN_CTREE_RESTORE |
2523                                           OPEN_CTREE_PARTIAL);
2524                 if (!info) {
2525                         error("open ctree failed");
2526                         ret = -EIO;
2527                         goto failed_open;
2528                 }
2529         }
2530
2531         cluster = malloc(BLOCK_SIZE);
2532         if (!cluster) {
2533                 error("not enough memory for cluster");
2534                 ret = -ENOMEM;
2535                 goto failed_info;
2536         }
2537
2538         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2539                              fixup_offset, info, multi_devices);
2540         if (ret) {
2541                 error("failed to initialize metadata restore state: %d", ret);
2542                 goto failed_cluster;
2543         }
2544
2545         if (!multi_devices && !old_restore) {
2546                 ret = build_chunk_tree(&mdrestore, cluster);
2547                 if (ret)
2548                         goto out;
2549                 if (!list_empty(&mdrestore.overlapping_chunks))
2550                         remap_overlapping_chunks(&mdrestore);
2551         }
2552
2553         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2554                 error("seek failed: %s", strerror(errno));
2555                 goto out;
2556         }
2557
2558         while (!mdrestore.error) {
2559                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2560                 if (!ret)
2561                         break;
2562
2563                 header = &cluster->header;
2564                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2565                     le64_to_cpu(header->bytenr) != bytenr) {
2566                         error("bad header in metadump image");
2567                         ret = -EIO;
2568                         break;
2569                 }
2570                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2571                 if (ret) {
2572                         error("failed to add cluster: %d", ret);
2573                         break;
2574                 }
2575         }
2576         ret = wait_for_worker(&mdrestore);
2577
2578         if (!ret && !multi_devices && !old_restore) {
2579                 struct btrfs_root *root;
2580                 struct stat st;
2581
2582                 root = open_ctree_fd(fileno(out), target, 0,
2583                                           OPEN_CTREE_PARTIAL |
2584                                           OPEN_CTREE_WRITES |
2585                                           OPEN_CTREE_NO_DEVICES);
2586                 if (!root) {
2587                         error("open ctree failed in %s", target);
2588                         ret = -EIO;
2589                         goto out;
2590                 }
2591                 info = root->fs_info;
2592
2593                 if (stat(target, &st)) {
2594                         error("stat %s failed: %s", target, strerror(errno));
2595                         close_ctree(info->chunk_root);
2596                         free(cluster);
2597                         return 1;
2598                 }
2599
2600                 ret = fixup_devices(info, &mdrestore, st.st_size);
2601                 close_ctree(info->chunk_root);
2602                 if (ret)
2603                         goto out;
2604         }
2605 out:
2606         mdrestore_destroy(&mdrestore, num_threads);
2607 failed_cluster:
2608         free(cluster);
2609 failed_info:
2610         if (fixup_offset && info)
2611                 close_ctree(info->chunk_root);
2612 failed_open:
2613         if (in != stdin)
2614                 fclose(in);
2615         return ret;
2616 }
2617
2618 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2619                                        const char *other_dev, u64 cur_devid)
2620 {
2621         struct btrfs_key key;
2622         struct extent_buffer *leaf;
2623         struct btrfs_path path;
2624         struct btrfs_dev_item *dev_item;
2625         struct btrfs_super_block *disk_super;
2626         char dev_uuid[BTRFS_UUID_SIZE];
2627         char fs_uuid[BTRFS_UUID_SIZE];
2628         u64 devid, type, io_align, io_width;
2629         u64 sector_size, total_bytes, bytes_used;
2630         char buf[BTRFS_SUPER_INFO_SIZE];
2631         int fp = -1;
2632         int ret;
2633
2634         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2635         key.type = BTRFS_DEV_ITEM_KEY;
2636         key.offset = cur_devid;
2637
2638         btrfs_init_path(&path);
2639         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2640         if (ret) {
2641                 error("search key failed: %d", ret);
2642                 ret = -EIO;
2643                 goto out;
2644         }
2645
2646         leaf = path.nodes[0];
2647         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2648                                   struct btrfs_dev_item);
2649
2650         devid = btrfs_device_id(leaf, dev_item);
2651         if (devid != cur_devid) {
2652                 error("devid mismatch: %llu != %llu",
2653                                 (unsigned long long)devid,
2654                                 (unsigned long long)cur_devid);
2655                 ret = -EIO;
2656                 goto out;
2657         }
2658
2659         type = btrfs_device_type(leaf, dev_item);
2660         io_align = btrfs_device_io_align(leaf, dev_item);
2661         io_width = btrfs_device_io_width(leaf, dev_item);
2662         sector_size = btrfs_device_sector_size(leaf, dev_item);
2663         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2664         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2665         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2666         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2667
2668         btrfs_release_path(&path);
2669
2670         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2671
2672         /* update other devices' super block */
2673         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2674         if (fp < 0) {
2675                 error("could not open %s: %s", other_dev, strerror(errno));
2676                 ret = -EIO;
2677                 goto out;
2678         }
2679
2680         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2681
2682         disk_super = (struct btrfs_super_block *)buf;
2683         dev_item = &disk_super->dev_item;
2684
2685         btrfs_set_stack_device_type(dev_item, type);
2686         btrfs_set_stack_device_id(dev_item, devid);
2687         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2688         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2689         btrfs_set_stack_device_io_align(dev_item, io_align);
2690         btrfs_set_stack_device_io_width(dev_item, io_width);
2691         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2692         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2693         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2694         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2695
2696         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2697         if (ret != BTRFS_SUPER_INFO_SIZE) {
2698                 if (ret < 0)
2699                         error("cannot write superblock: %s", strerror(ret));
2700                 else
2701                         error("cannot write superblock");
2702                 ret = -EIO;
2703                 goto out;
2704         }
2705
2706         write_backup_supers(fp, (u8 *)buf);
2707
2708 out:
2709         if (fp != -1)
2710                 close(fp);
2711         return ret;
2712 }
2713
2714 static void print_usage(int ret)
2715 {
2716         printf("usage: btrfs-image [options] source target\n");
2717         printf("\t-r      \trestore metadump image\n");
2718         printf("\t-c value\tcompression level (0 ~ 9)\n");
2719         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2720         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2721         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2722         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2723         printf("\t-m       \trestore for multiple devices\n");
2724         printf("\n");
2725         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2726         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2727         exit(ret);
2728 }
2729
2730 int main(int argc, char *argv[])
2731 {
2732         char *source;
2733         char *target;
2734         u64 num_threads = 0;
2735         u64 compress_level = 0;
2736         int create = 1;
2737         int old_restore = 0;
2738         int walk_trees = 0;
2739         int multi_devices = 0;
2740         int ret;
2741         int sanitize = 0;
2742         int dev_cnt = 0;
2743         int usage_error = 0;
2744         FILE *out;
2745
2746         while (1) {
2747                 static const struct option long_options[] = {
2748                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2749                         { NULL, 0, NULL, 0 }
2750                 };
2751                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2752                 if (c < 0)
2753                         break;
2754                 switch (c) {
2755                 case 'r':
2756                         create = 0;
2757                         break;
2758                 case 't':
2759                         num_threads = arg_strtou64(optarg);
2760                         if (num_threads > MAX_WORKER_THREADS) {
2761                                 error("number of threads out of range: %llu > %d",
2762                                         (unsigned long long)num_threads,
2763                                         MAX_WORKER_THREADS);
2764                                 return 1;
2765                         }
2766                         break;
2767                 case 'c':
2768                         compress_level = arg_strtou64(optarg);
2769                         if (compress_level > 9) {
2770                                 error("compression level out of range: %llu",
2771                                         (unsigned long long)compress_level);
2772                                 return 1;
2773                         }
2774                         break;
2775                 case 'o':
2776                         old_restore = 1;
2777                         break;
2778                 case 's':
2779                         sanitize++;
2780                         break;
2781                 case 'w':
2782                         walk_trees = 1;
2783                         break;
2784                 case 'm':
2785                         create = 0;
2786                         multi_devices = 1;
2787                         break;
2788                         case GETOPT_VAL_HELP:
2789                 default:
2790                         print_usage(c != GETOPT_VAL_HELP);
2791                 }
2792         }
2793
2794         set_argv0(argv);
2795         if (check_argc_min(argc - optind, 2))
2796                 print_usage(1);
2797
2798         dev_cnt = argc - optind - 1;
2799
2800         if (create) {
2801                 if (old_restore) {
2802                         error(
2803                         "create and restore cannot be used at the same time");
2804                         usage_error++;
2805                 }
2806         } else {
2807                 if (walk_trees || sanitize || compress_level) {
2808                         error(
2809                         "useing -w, -s, -c options for restore makes no sense");
2810                         usage_error++;
2811                 }
2812                 if (multi_devices && dev_cnt < 2) {
2813                         error("not enough devices specified for -m option");
2814                         usage_error++;
2815                 }
2816                 if (!multi_devices && dev_cnt != 1) {
2817                         error("accepts only 1 device without -m option");
2818                         usage_error++;
2819                 }
2820         }
2821
2822         if (usage_error)
2823                 print_usage(1);
2824
2825         source = argv[optind];
2826         target = argv[optind + 1];
2827
2828         if (create && !strcmp(target, "-")) {
2829                 out = stdout;
2830         } else {
2831                 out = fopen(target, "w+");
2832                 if (!out) {
2833                         error("unable to create target file %s", target);
2834                         exit(1);
2835                 }
2836         }
2837
2838         if (compress_level > 0 || create == 0) {
2839                 if (num_threads == 0) {
2840                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2841
2842                         if (tmp <= 0)
2843                                 tmp = 1;
2844                         num_threads = tmp;
2845                 }
2846         } else {
2847                 num_threads = 0;
2848         }
2849
2850         if (create) {
2851                 ret = check_mounted(source);
2852                 if (ret < 0) {
2853                         warning("unable to check mount status of: %s",
2854                                         strerror(-ret));
2855                 } else if (ret) {
2856                         warning("%s already mounted, results may be inaccurate",
2857                                         source);
2858                 }
2859
2860                 ret = create_metadump(source, out, num_threads,
2861                                       compress_level, sanitize, walk_trees);
2862         } else {
2863                 ret = restore_metadump(source, out, old_restore, num_threads,
2864                                        0, target, multi_devices);
2865         }
2866         if (ret) {
2867                 error("%s failed: %s", (create) ? "create" : "restore",
2868                        strerror(errno));
2869                 goto out;
2870         }
2871
2872          /* extended support for multiple devices */
2873         if (!create && multi_devices) {
2874                 struct btrfs_fs_info *info;
2875                 u64 total_devs;
2876                 int i;
2877
2878                 info = open_ctree_fs_info(target, 0, 0, 0,
2879                                           OPEN_CTREE_PARTIAL |
2880                                           OPEN_CTREE_RESTORE);
2881                 if (!info) {
2882                         error("open ctree failed at %s", target);
2883                         return 1;
2884                 }
2885
2886                 total_devs = btrfs_super_num_devices(info->super_copy);
2887                 if (total_devs != dev_cnt) {
2888                         error("it needs %llu devices but has only %d",
2889                                 total_devs, dev_cnt);
2890                         close_ctree(info->chunk_root);
2891                         goto out;
2892                 }
2893
2894                 /* update super block on other disks */
2895                 for (i = 2; i <= dev_cnt; i++) {
2896                         ret = update_disk_super_on_device(info,
2897                                         argv[optind + i], (u64)i);
2898                         if (ret) {
2899                                 error("update disk superblock failed devid %d: %d",
2900                                         i, ret);
2901                                 close_ctree(info->chunk_root);
2902                                 exit(1);
2903                         }
2904                 }
2905
2906                 close_ctree(info->chunk_root);
2907
2908                 /* fix metadata block to map correct chunk */
2909                 ret = restore_metadump(source, out, 0, num_threads, 1,
2910                                        target, 1);
2911                 if (ret) {
2912                         error("unable to fixup metadump: %d", ret);
2913                         exit(1);
2914                 }
2915         }
2916 out:
2917         if (out == stdout) {
2918                 fflush(out);
2919         } else {
2920                 fclose(out);
2921                 if (ret && create) {
2922                         int unlink_ret;
2923
2924                         unlink_ret = unlink(target);
2925                         if (unlink_ret)
2926                                 error("unlink output file %s failed: %s",
2927                                                 target, strerror(errno));
2928                 }
2929         }
2930
2931         btrfs_close_all_devices();
2932
2933         return !!ret;
2934 }