2100f204781d6305492e367aaeca054f66b53bc5
[platform/upstream/btrfs-progs.git] / image / main.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38 #include "help.h"
39
40 #define HEADER_MAGIC            0xbd5c25e27295668bULL
41 #define MAX_PENDING_SIZE        (256 * 1024)
42 #define BLOCK_SIZE              1024
43 #define BLOCK_MASK              (BLOCK_SIZE - 1)
44
45 #define COMPRESS_NONE           0
46 #define COMPRESS_ZLIB           1
47
48 #define MAX_WORKER_THREADS      (32)
49
50 struct meta_cluster_item {
51         __le64 bytenr;
52         __le32 size;
53 } __attribute__ ((__packed__));
54
55 struct meta_cluster_header {
56         __le64 magic;
57         __le64 bytenr;
58         __le32 nritems;
59         u8 compress;
60 } __attribute__ ((__packed__));
61
62 /* cluster header + index items + buffers */
63 struct meta_cluster {
64         struct meta_cluster_header header;
65         struct meta_cluster_item items[];
66 } __attribute__ ((__packed__));
67
68 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
69                            sizeof(struct meta_cluster_item))
70
71 struct fs_chunk {
72         u64 logical;
73         u64 physical;
74         /*
75          * physical_dup only store additonal physical for BTRFS_BLOCK_GROUP_DUP
76          * currently restore only support single and DUP
77          * TODO: modify this structure and the function related to this
78          * structure for support RAID*
79          */
80         u64 physical_dup;
81         u64 bytes;
82         struct rb_node l;
83         struct rb_node p;
84         struct list_head list;
85 };
86
87 struct async_work {
88         struct list_head list;
89         struct list_head ordered;
90         u64 start;
91         u64 size;
92         u8 *buffer;
93         size_t bufsize;
94         int error;
95 };
96
97 struct metadump_struct {
98         struct btrfs_root *root;
99         FILE *out;
100
101         union {
102                 struct meta_cluster cluster;
103                 char meta_cluster_bytes[BLOCK_SIZE];
104         };
105
106         pthread_t threads[MAX_WORKER_THREADS];
107         size_t num_threads;
108         pthread_mutex_t mutex;
109         pthread_cond_t cond;
110         struct rb_root name_tree;
111
112         struct list_head list;
113         struct list_head ordered;
114         size_t num_items;
115         size_t num_ready;
116
117         u64 pending_start;
118         u64 pending_size;
119
120         int compress_level;
121         int done;
122         int data;
123         int sanitize_names;
124
125         int error;
126 };
127
128 struct name {
129         struct rb_node n;
130         char *val;
131         char *sub;
132         u32 len;
133 };
134
135 struct mdrestore_struct {
136         FILE *in;
137         FILE *out;
138
139         pthread_t threads[MAX_WORKER_THREADS];
140         size_t num_threads;
141         pthread_mutex_t mutex;
142         pthread_cond_t cond;
143
144         struct rb_root chunk_tree;
145         struct rb_root physical_tree;
146         struct list_head list;
147         struct list_head overlapping_chunks;
148         size_t num_items;
149         u32 nodesize;
150         u64 devid;
151         u64 alloced_chunks;
152         u64 last_physical_offset;
153         u8 uuid[BTRFS_UUID_SIZE];
154         u8 fsid[BTRFS_FSID_SIZE];
155
156         int compress_method;
157         int done;
158         int error;
159         int old_restore;
160         int fixup_offset;
161         int multi_devices;
162         int clear_space_cache;
163         struct btrfs_fs_info *info;
164 };
165
166 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
167                                    u64 search, u64 cluster_bytenr);
168 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
169
170 static void csum_block(u8 *buf, size_t len)
171 {
172         u8 result[BTRFS_CRC32_SIZE];
173         u32 crc = ~(u32)0;
174         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
175         btrfs_csum_final(crc, result);
176         memcpy(buf, result, BTRFS_CRC32_SIZE);
177 }
178
179 static int has_name(struct btrfs_key *key)
180 {
181         switch (key->type) {
182         case BTRFS_DIR_ITEM_KEY:
183         case BTRFS_DIR_INDEX_KEY:
184         case BTRFS_INODE_REF_KEY:
185         case BTRFS_INODE_EXTREF_KEY:
186         case BTRFS_XATTR_ITEM_KEY:
187                 return 1;
188         default:
189                 break;
190         }
191
192         return 0;
193 }
194
195 static char *generate_garbage(u32 name_len)
196 {
197         char *buf = malloc(name_len);
198         int i;
199
200         if (!buf)
201                 return NULL;
202
203         for (i = 0; i < name_len; i++) {
204                 char c = rand_range(94) + 33;
205
206                 if (c == '/')
207                         c++;
208                 buf[i] = c;
209         }
210
211         return buf;
212 }
213
214 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
215 {
216         struct name *entry = rb_entry(a, struct name, n);
217         struct name *ins = rb_entry(b, struct name, n);
218         u32 len;
219
220         len = min(ins->len, entry->len);
221         return memcmp(ins->val, entry->val, len);
222 }
223
224 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
225 {
226         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
227         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
228
229         if (fuzz && ins->logical >= entry->logical &&
230             ins->logical < entry->logical + entry->bytes)
231                 return 0;
232
233         if (ins->logical < entry->logical)
234                 return -1;
235         else if (ins->logical > entry->logical)
236                 return 1;
237         return 0;
238 }
239
240 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
241 {
242         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
243         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
244
245         if (fuzz && ins->physical >= entry->physical &&
246             ins->physical < entry->physical + entry->bytes)
247                 return 0;
248
249         if (fuzz && entry->physical >= ins->physical &&
250             entry->physical < ins->physical + ins->bytes)
251                 return 0;
252
253         if (ins->physical < entry->physical)
254                 return -1;
255         else if (ins->physical > entry->physical)
256                 return 1;
257         return 0;
258 }
259
260 static void tree_insert(struct rb_root *root, struct rb_node *ins,
261                         int (*cmp)(struct rb_node *a, struct rb_node *b,
262                                    int fuzz))
263 {
264         struct rb_node ** p = &root->rb_node;
265         struct rb_node * parent = NULL;
266         int dir;
267
268         while(*p) {
269                 parent = *p;
270
271                 dir = cmp(*p, ins, 1);
272                 if (dir < 0)
273                         p = &(*p)->rb_left;
274                 else if (dir > 0)
275                         p = &(*p)->rb_right;
276                 else
277                         BUG();
278         }
279
280         rb_link_node(ins, parent, p);
281         rb_insert_color(ins, root);
282 }
283
284 static struct rb_node *tree_search(struct rb_root *root,
285                                    struct rb_node *search,
286                                    int (*cmp)(struct rb_node *a,
287                                               struct rb_node *b, int fuzz),
288                                    int fuzz)
289 {
290         struct rb_node *n = root->rb_node;
291         int dir;
292
293         while (n) {
294                 dir = cmp(n, search, fuzz);
295                 if (dir < 0)
296                         n = n->rb_left;
297                 else if (dir > 0)
298                         n = n->rb_right;
299                 else
300                         return n;
301         }
302
303         return NULL;
304 }
305
306 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
307                                u64 *size, u64 *physical_dup)
308 {
309         struct fs_chunk *fs_chunk;
310         struct rb_node *entry;
311         struct fs_chunk search;
312         u64 offset;
313
314         if (logical == BTRFS_SUPER_INFO_OFFSET)
315                 return logical;
316
317         search.logical = logical;
318         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
319         if (!entry) {
320                 if (mdres->in != stdin)
321                         warning("cannot find a chunk, using logical");
322                 return logical;
323         }
324         fs_chunk = rb_entry(entry, struct fs_chunk, l);
325         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
326                 BUG();
327         offset = search.logical - fs_chunk->logical;
328
329         if (physical_dup) {
330                 /* Only in dup case, physical_dup is not equal to 0 */
331                 if (fs_chunk->physical_dup)
332                         *physical_dup = fs_chunk->physical_dup + offset;
333                 else
334                         *physical_dup = 0;
335         }
336
337         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
338         return fs_chunk->physical + offset;
339 }
340
341
342 static char *find_collision(struct metadump_struct *md, char *name,
343                             u32 name_len)
344 {
345         struct name *val;
346         struct rb_node *entry;
347         struct name tmp;
348         unsigned long checksum;
349         int found = 0;
350         int i;
351
352         tmp.val = name;
353         tmp.len = name_len;
354         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
355         if (entry) {
356                 val = rb_entry(entry, struct name, n);
357                 free(name);
358                 return val->sub;
359         }
360
361         val = malloc(sizeof(struct name));
362         if (!val) {
363                 error("cannot sanitize name, not enough memory");
364                 free(name);
365                 return NULL;
366         }
367
368         memset(val, 0, sizeof(*val));
369
370         val->val = name;
371         val->len = name_len;
372         val->sub = malloc(name_len);
373         if (!val->sub) {
374                 error("cannot sanitize name, not enough memory");
375                 free(val);
376                 free(name);
377                 return NULL;
378         }
379
380         checksum = crc32c(~1, val->val, name_len);
381         memset(val->sub, ' ', name_len);
382         i = 0;
383         while (1) {
384                 if (crc32c(~1, val->sub, name_len) == checksum &&
385                     memcmp(val->sub, val->val, val->len)) {
386                         found = 1;
387                         break;
388                 }
389
390                 if (val->sub[i] == 127) {
391                         do {
392                                 i++;
393                                 if (i >= name_len)
394                                         break;
395                         } while (val->sub[i] == 127);
396
397                         if (i >= name_len)
398                                 break;
399                         val->sub[i]++;
400                         if (val->sub[i] == '/')
401                                 val->sub[i]++;
402                         memset(val->sub, ' ', i);
403                         i = 0;
404                         continue;
405                 } else {
406                         val->sub[i]++;
407                         if (val->sub[i] == '/')
408                                 val->sub[i]++;
409                 }
410         }
411
412         if (!found) {
413                 warning(
414 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
415                         val->len, val->val);
416                 for (i = 0; i < name_len; i++) {
417                         char c = rand_range(94) + 33;
418
419                         if (c == '/')
420                                 c++;
421                         val->sub[i] = c;
422                 }
423         }
424
425         tree_insert(&md->name_tree, &val->n, name_cmp);
426         return val->sub;
427 }
428
429 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
430                               int slot)
431 {
432         struct btrfs_dir_item *dir_item;
433         char *buf;
434         char *garbage;
435         unsigned long name_ptr;
436         u32 total_len;
437         u32 cur = 0;
438         u32 this_len;
439         u32 name_len;
440         int free_garbage = (md->sanitize_names == 1);
441
442         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
443         total_len = btrfs_item_size_nr(eb, slot);
444         while (cur < total_len) {
445                 this_len = sizeof(*dir_item) +
446                         btrfs_dir_name_len(eb, dir_item) +
447                         btrfs_dir_data_len(eb, dir_item);
448                 name_ptr = (unsigned long)(dir_item + 1);
449                 name_len = btrfs_dir_name_len(eb, dir_item);
450
451                 if (md->sanitize_names > 1) {
452                         buf = malloc(name_len);
453                         if (!buf) {
454                                 error("cannot sanitize name, not enough memory");
455                                 return;
456                         }
457                         read_extent_buffer(eb, buf, name_ptr, name_len);
458                         garbage = find_collision(md, buf, name_len);
459                 } else {
460                         garbage = generate_garbage(name_len);
461                 }
462                 if (!garbage) {
463                         error("cannot sanitize name, not enough memory");
464                         return;
465                 }
466                 write_extent_buffer(eb, garbage, name_ptr, name_len);
467                 cur += this_len;
468                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
469                                                      this_len);
470                 if (free_garbage)
471                         free(garbage);
472         }
473 }
474
475 static void sanitize_inode_ref(struct metadump_struct *md,
476                                struct extent_buffer *eb, int slot, int ext)
477 {
478         struct btrfs_inode_extref *extref;
479         struct btrfs_inode_ref *ref;
480         char *garbage, *buf;
481         unsigned long ptr;
482         unsigned long name_ptr;
483         u32 item_size;
484         u32 cur_offset = 0;
485         int len;
486         int free_garbage = (md->sanitize_names == 1);
487
488         item_size = btrfs_item_size_nr(eb, slot);
489         ptr = btrfs_item_ptr_offset(eb, slot);
490         while (cur_offset < item_size) {
491                 if (ext) {
492                         extref = (struct btrfs_inode_extref *)(ptr +
493                                                                cur_offset);
494                         name_ptr = (unsigned long)(&extref->name);
495                         len = btrfs_inode_extref_name_len(eb, extref);
496                         cur_offset += sizeof(*extref);
497                 } else {
498                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
499                         len = btrfs_inode_ref_name_len(eb, ref);
500                         name_ptr = (unsigned long)(ref + 1);
501                         cur_offset += sizeof(*ref);
502                 }
503                 cur_offset += len;
504
505                 if (md->sanitize_names > 1) {
506                         buf = malloc(len);
507                         if (!buf) {
508                                 error("cannot sanitize name, not enough memory");
509                                 return;
510                         }
511                         read_extent_buffer(eb, buf, name_ptr, len);
512                         garbage = find_collision(md, buf, len);
513                 } else {
514                         garbage = generate_garbage(len);
515                 }
516
517                 if (!garbage) {
518                         error("cannot sanitize name, not enough memory");
519                         return;
520                 }
521                 write_extent_buffer(eb, garbage, name_ptr, len);
522                 if (free_garbage)
523                         free(garbage);
524         }
525 }
526
527 static void sanitize_xattr(struct metadump_struct *md,
528                            struct extent_buffer *eb, int slot)
529 {
530         struct btrfs_dir_item *dir_item;
531         unsigned long data_ptr;
532         u32 data_len;
533
534         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
535         data_len = btrfs_dir_data_len(eb, dir_item);
536
537         data_ptr = (unsigned long)((char *)(dir_item + 1) +
538                                    btrfs_dir_name_len(eb, dir_item));
539         memset_extent_buffer(eb, 0, data_ptr, data_len);
540 }
541
542 static void sanitize_name(struct metadump_struct *md, u8 *dst,
543                           struct extent_buffer *src, struct btrfs_key *key,
544                           int slot)
545 {
546         struct extent_buffer *eb;
547
548         eb = alloc_dummy_eb(src->start, src->len);
549         if (!eb) {
550                 error("cannot sanitize name, not enough memory");
551                 return;
552         }
553
554         memcpy(eb->data, src->data, src->len);
555
556         switch (key->type) {
557         case BTRFS_DIR_ITEM_KEY:
558         case BTRFS_DIR_INDEX_KEY:
559                 sanitize_dir_item(md, eb, slot);
560                 break;
561         case BTRFS_INODE_REF_KEY:
562                 sanitize_inode_ref(md, eb, slot, 0);
563                 break;
564         case BTRFS_INODE_EXTREF_KEY:
565                 sanitize_inode_ref(md, eb, slot, 1);
566                 break;
567         case BTRFS_XATTR_ITEM_KEY:
568                 sanitize_xattr(md, eb, slot);
569                 break;
570         default:
571                 break;
572         }
573
574         memcpy(dst, eb->data, eb->len);
575         free(eb);
576 }
577
578 /*
579  * zero inline extents and csum items
580  */
581 static void zero_items(struct metadump_struct *md, u8 *dst,
582                        struct extent_buffer *src)
583 {
584         struct btrfs_file_extent_item *fi;
585         struct btrfs_item *item;
586         struct btrfs_key key;
587         u32 nritems = btrfs_header_nritems(src);
588         size_t size;
589         unsigned long ptr;
590         int i, extent_type;
591
592         for (i = 0; i < nritems; i++) {
593                 item = btrfs_item_nr(i);
594                 btrfs_item_key_to_cpu(src, &key, i);
595                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
596                         size = btrfs_item_size_nr(src, i);
597                         memset(dst + btrfs_leaf_data(src) +
598                                btrfs_item_offset_nr(src, i), 0, size);
599                         continue;
600                 }
601
602                 if (md->sanitize_names && has_name(&key)) {
603                         sanitize_name(md, dst, src, &key, i);
604                         continue;
605                 }
606
607                 if (key.type != BTRFS_EXTENT_DATA_KEY)
608                         continue;
609
610                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
611                 extent_type = btrfs_file_extent_type(src, fi);
612                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
613                         continue;
614
615                 ptr = btrfs_file_extent_inline_start(fi);
616                 size = btrfs_file_extent_inline_item_len(src, item);
617                 memset(dst + ptr, 0, size);
618         }
619 }
620
621 /*
622  * copy buffer and zero useless data in the buffer
623  */
624 static void copy_buffer(struct metadump_struct *md, u8 *dst,
625                         struct extent_buffer *src)
626 {
627         int level;
628         size_t size;
629         u32 nritems;
630
631         memcpy(dst, src->data, src->len);
632         if (src->start == BTRFS_SUPER_INFO_OFFSET)
633                 return;
634
635         level = btrfs_header_level(src);
636         nritems = btrfs_header_nritems(src);
637
638         if (nritems == 0) {
639                 size = sizeof(struct btrfs_header);
640                 memset(dst + size, 0, src->len - size);
641         } else if (level == 0) {
642                 size = btrfs_leaf_data(src) +
643                         btrfs_item_offset_nr(src, nritems - 1) -
644                         btrfs_item_nr_offset(nritems);
645                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
646                 zero_items(md, dst, src);
647         } else {
648                 size = offsetof(struct btrfs_node, ptrs) +
649                         sizeof(struct btrfs_key_ptr) * nritems;
650                 memset(dst + size, 0, src->len - size);
651         }
652         csum_block(dst, src->len);
653 }
654
655 static void *dump_worker(void *data)
656 {
657         struct metadump_struct *md = (struct metadump_struct *)data;
658         struct async_work *async;
659         int ret;
660
661         while (1) {
662                 pthread_mutex_lock(&md->mutex);
663                 while (list_empty(&md->list)) {
664                         if (md->done) {
665                                 pthread_mutex_unlock(&md->mutex);
666                                 goto out;
667                         }
668                         pthread_cond_wait(&md->cond, &md->mutex);
669                 }
670                 async = list_entry(md->list.next, struct async_work, list);
671                 list_del_init(&async->list);
672                 pthread_mutex_unlock(&md->mutex);
673
674                 if (md->compress_level > 0) {
675                         u8 *orig = async->buffer;
676
677                         async->bufsize = compressBound(async->size);
678                         async->buffer = malloc(async->bufsize);
679                         if (!async->buffer) {
680                                 error("not enough memory for async buffer");
681                                 pthread_mutex_lock(&md->mutex);
682                                 if (!md->error)
683                                         md->error = -ENOMEM;
684                                 pthread_mutex_unlock(&md->mutex);
685                                 pthread_exit(NULL);
686                         }
687
688                         ret = compress2(async->buffer,
689                                          (unsigned long *)&async->bufsize,
690                                          orig, async->size, md->compress_level);
691
692                         if (ret != Z_OK)
693                                 async->error = 1;
694
695                         free(orig);
696                 }
697
698                 pthread_mutex_lock(&md->mutex);
699                 md->num_ready++;
700                 pthread_mutex_unlock(&md->mutex);
701         }
702 out:
703         pthread_exit(NULL);
704 }
705
706 static void meta_cluster_init(struct metadump_struct *md, u64 start)
707 {
708         struct meta_cluster_header *header;
709
710         md->num_items = 0;
711         md->num_ready = 0;
712         header = &md->cluster.header;
713         header->magic = cpu_to_le64(HEADER_MAGIC);
714         header->bytenr = cpu_to_le64(start);
715         header->nritems = cpu_to_le32(0);
716         header->compress = md->compress_level > 0 ?
717                            COMPRESS_ZLIB : COMPRESS_NONE;
718 }
719
720 static void metadump_destroy(struct metadump_struct *md, int num_threads)
721 {
722         int i;
723         struct rb_node *n;
724
725         pthread_mutex_lock(&md->mutex);
726         md->done = 1;
727         pthread_cond_broadcast(&md->cond);
728         pthread_mutex_unlock(&md->mutex);
729
730         for (i = 0; i < num_threads; i++)
731                 pthread_join(md->threads[i], NULL);
732
733         pthread_cond_destroy(&md->cond);
734         pthread_mutex_destroy(&md->mutex);
735
736         while ((n = rb_first(&md->name_tree))) {
737                 struct name *name;
738
739                 name = rb_entry(n, struct name, n);
740                 rb_erase(n, &md->name_tree);
741                 free(name->val);
742                 free(name->sub);
743                 free(name);
744         }
745 }
746
747 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
748                          FILE *out, int num_threads, int compress_level,
749                          int sanitize_names)
750 {
751         int i, ret = 0;
752
753         memset(md, 0, sizeof(*md));
754         INIT_LIST_HEAD(&md->list);
755         INIT_LIST_HEAD(&md->ordered);
756         md->root = root;
757         md->out = out;
758         md->pending_start = (u64)-1;
759         md->compress_level = compress_level;
760         md->sanitize_names = sanitize_names;
761         if (sanitize_names > 1)
762                 crc32c_optimization_init();
763
764         md->name_tree.rb_node = NULL;
765         md->num_threads = num_threads;
766         pthread_cond_init(&md->cond, NULL);
767         pthread_mutex_init(&md->mutex, NULL);
768         meta_cluster_init(md, 0);
769
770         if (!num_threads)
771                 return 0;
772
773         for (i = 0; i < num_threads; i++) {
774                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
775                 if (ret)
776                         break;
777         }
778
779         if (ret)
780                 metadump_destroy(md, i + 1);
781
782         return ret;
783 }
784
785 static int write_zero(FILE *out, size_t size)
786 {
787         static char zero[BLOCK_SIZE];
788         return fwrite(zero, size, 1, out);
789 }
790
791 static int write_buffers(struct metadump_struct *md, u64 *next)
792 {
793         struct meta_cluster_header *header = &md->cluster.header;
794         struct meta_cluster_item *item;
795         struct async_work *async;
796         u64 bytenr = 0;
797         u32 nritems = 0;
798         int ret;
799         int err = 0;
800
801         if (list_empty(&md->ordered))
802                 goto out;
803
804         /* wait until all buffers are compressed */
805         while (!err && md->num_items > md->num_ready) {
806                 struct timespec ts = {
807                         .tv_sec = 0,
808                         .tv_nsec = 10000000,
809                 };
810                 pthread_mutex_unlock(&md->mutex);
811                 nanosleep(&ts, NULL);
812                 pthread_mutex_lock(&md->mutex);
813                 err = md->error;
814         }
815
816         if (err) {
817                 error("one of the threads failed: %s", strerror(-err));
818                 goto out;
819         }
820
821         /* setup and write index block */
822         list_for_each_entry(async, &md->ordered, ordered) {
823                 item = &md->cluster.items[nritems];
824                 item->bytenr = cpu_to_le64(async->start);
825                 item->size = cpu_to_le32(async->bufsize);
826                 nritems++;
827         }
828         header->nritems = cpu_to_le32(nritems);
829
830         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
831         if (ret != 1) {
832                 error("unable to write out cluster: %s", strerror(errno));
833                 return -errno;
834         }
835
836         /* write buffers */
837         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
838         while (!list_empty(&md->ordered)) {
839                 async = list_entry(md->ordered.next, struct async_work,
840                                    ordered);
841                 list_del_init(&async->ordered);
842
843                 bytenr += async->bufsize;
844                 if (!err)
845                         ret = fwrite(async->buffer, async->bufsize, 1,
846                                      md->out);
847                 if (ret != 1) {
848                         error("unable to write out cluster: %s",
849                                 strerror(errno));
850                         err = -errno;
851                         ret = 0;
852                 }
853
854                 free(async->buffer);
855                 free(async);
856         }
857
858         /* zero unused space in the last block */
859         if (!err && bytenr & BLOCK_MASK) {
860                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
861
862                 bytenr += size;
863                 ret = write_zero(md->out, size);
864                 if (ret != 1) {
865                         error("unable to zero out buffer: %s",
866                                 strerror(errno));
867                         err = -errno;
868                 }
869         }
870 out:
871         *next = bytenr;
872         return err;
873 }
874
875 static int read_data_extent(struct metadump_struct *md,
876                             struct async_work *async)
877 {
878         struct btrfs_root *root = md->root;
879         struct btrfs_fs_info *fs_info = root->fs_info;
880         u64 bytes_left = async->size;
881         u64 logical = async->start;
882         u64 offset = 0;
883         u64 read_len;
884         int num_copies;
885         int cur_mirror;
886         int ret;
887
888         num_copies = btrfs_num_copies(root->fs_info, logical, bytes_left);
889
890         /* Try our best to read data, just like read_tree_block() */
891         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
892                 while (bytes_left) {
893                         read_len = bytes_left;
894                         ret = read_extent_data(fs_info,
895                                         (char *)(async->buffer + offset),
896                                         logical, &read_len, cur_mirror);
897                         if (ret < 0)
898                                 break;
899                         offset += read_len;
900                         logical += read_len;
901                         bytes_left -= read_len;
902                 }
903         }
904         if (bytes_left)
905                 return -EIO;
906         return 0;
907 }
908
909 static int get_dev_fd(struct btrfs_root *root)
910 {
911         struct btrfs_device *dev;
912
913         dev = list_first_entry(&root->fs_info->fs_devices->devices,
914                                struct btrfs_device, dev_list);
915         return dev->fd;
916 }
917
918 static int flush_pending(struct metadump_struct *md, int done)
919 {
920         struct async_work *async = NULL;
921         struct extent_buffer *eb;
922         u64 blocksize = md->root->fs_info->nodesize;
923         u64 start = 0;
924         u64 size;
925         size_t offset;
926         int ret = 0;
927
928         if (md->pending_size) {
929                 async = calloc(1, sizeof(*async));
930                 if (!async)
931                         return -ENOMEM;
932
933                 async->start = md->pending_start;
934                 async->size = md->pending_size;
935                 async->bufsize = async->size;
936                 async->buffer = malloc(async->bufsize);
937                 if (!async->buffer) {
938                         free(async);
939                         return -ENOMEM;
940                 }
941                 offset = 0;
942                 start = async->start;
943                 size = async->size;
944
945                 if (md->data) {
946                         ret = read_data_extent(md, async);
947                         if (ret) {
948                                 free(async->buffer);
949                                 free(async);
950                                 return ret;
951                         }
952                 }
953
954                 /*
955                  * Balance can make the mapping not cover the super block, so
956                  * just copy directly from one of the devices.
957                  */
958                 if (start == BTRFS_SUPER_INFO_OFFSET) {
959                         int fd = get_dev_fd(md->root);
960
961                         ret = pread64(fd, async->buffer, size, start);
962                         if (ret < size) {
963                                 free(async->buffer);
964                                 free(async);
965                                 error("unable to read superblock at %llu: %s",
966                                                 (unsigned long long)start,
967                                                 strerror(errno));
968                                 return -errno;
969                         }
970                         size = 0;
971                         ret = 0;
972                 }
973
974                 while (!md->data && size > 0) {
975                         u64 this_read = min(blocksize, size);
976                         eb = read_tree_block(md->root->fs_info, start,
977                                              this_read, 0);
978                         if (!extent_buffer_uptodate(eb)) {
979                                 free(async->buffer);
980                                 free(async);
981                                 error("unable to read metadata block %llu",
982                                         (unsigned long long)start);
983                                 return -EIO;
984                         }
985                         copy_buffer(md, async->buffer + offset, eb);
986                         free_extent_buffer(eb);
987                         start += this_read;
988                         offset += this_read;
989                         size -= this_read;
990                 }
991
992                 md->pending_start = (u64)-1;
993                 md->pending_size = 0;
994         } else if (!done) {
995                 return 0;
996         }
997
998         pthread_mutex_lock(&md->mutex);
999         if (async) {
1000                 list_add_tail(&async->ordered, &md->ordered);
1001                 md->num_items++;
1002                 if (md->compress_level > 0) {
1003                         list_add_tail(&async->list, &md->list);
1004                         pthread_cond_signal(&md->cond);
1005                 } else {
1006                         md->num_ready++;
1007                 }
1008         }
1009         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1010                 ret = write_buffers(md, &start);
1011                 if (ret)
1012                         error("unable to write buffers: %s", strerror(-ret));
1013                 else
1014                         meta_cluster_init(md, start);
1015         }
1016         pthread_mutex_unlock(&md->mutex);
1017         return ret;
1018 }
1019
1020 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1021                       int data)
1022 {
1023         int ret;
1024         if (md->data != data ||
1025             md->pending_size + size > MAX_PENDING_SIZE ||
1026             md->pending_start + md->pending_size != start) {
1027                 ret = flush_pending(md, 0);
1028                 if (ret)
1029                         return ret;
1030                 md->pending_start = start;
1031         }
1032         readahead_tree_block(md->root->fs_info, start, size, 0);
1033         md->pending_size += size;
1034         md->data = data;
1035         return 0;
1036 }
1037
1038 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1039 static int is_tree_block(struct btrfs_root *extent_root,
1040                          struct btrfs_path *path, u64 bytenr)
1041 {
1042         struct extent_buffer *leaf;
1043         struct btrfs_key key;
1044         u64 ref_objectid;
1045         int ret;
1046
1047         leaf = path->nodes[0];
1048         while (1) {
1049                 struct btrfs_extent_ref_v0 *ref_item;
1050                 path->slots[0]++;
1051                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1052                         ret = btrfs_next_leaf(extent_root, path);
1053                         if (ret < 0)
1054                                 return ret;
1055                         if (ret > 0)
1056                                 break;
1057                         leaf = path->nodes[0];
1058                 }
1059                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1060                 if (key.objectid != bytenr)
1061                         break;
1062                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1063                         continue;
1064                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1065                                           struct btrfs_extent_ref_v0);
1066                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1067                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1068                         return 1;
1069                 break;
1070         }
1071         return 0;
1072 }
1073 #endif
1074
1075 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1076                             struct metadump_struct *metadump, int root_tree)
1077 {
1078         struct extent_buffer *tmp;
1079         struct btrfs_root_item *ri;
1080         struct btrfs_key key;
1081         struct btrfs_fs_info *fs_info = root->fs_info;
1082         u64 bytenr;
1083         int level;
1084         int nritems = 0;
1085         int i = 0;
1086         int ret;
1087
1088         ret = add_extent(btrfs_header_bytenr(eb), fs_info->nodesize,
1089                          metadump, 0);
1090         if (ret) {
1091                 error("unable to add metadata block %llu: %d",
1092                                 btrfs_header_bytenr(eb), ret);
1093                 return ret;
1094         }
1095
1096         if (btrfs_header_level(eb) == 0 && !root_tree)
1097                 return 0;
1098
1099         level = btrfs_header_level(eb);
1100         nritems = btrfs_header_nritems(eb);
1101         for (i = 0; i < nritems; i++) {
1102                 if (level == 0) {
1103                         btrfs_item_key_to_cpu(eb, &key, i);
1104                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1105                                 continue;
1106                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1107                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1108                         tmp = read_tree_block(fs_info, bytenr,
1109                                               fs_info->nodesize, 0);
1110                         if (!extent_buffer_uptodate(tmp)) {
1111                                 error("unable to read log root block");
1112                                 return -EIO;
1113                         }
1114                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1115                         free_extent_buffer(tmp);
1116                         if (ret)
1117                                 return ret;
1118                 } else {
1119                         bytenr = btrfs_node_blockptr(eb, i);
1120                         tmp = read_tree_block(fs_info, bytenr,
1121                                               fs_info->nodesize, 0);
1122                         if (!extent_buffer_uptodate(tmp)) {
1123                                 error("unable to read log root block");
1124                                 return -EIO;
1125                         }
1126                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1127                         free_extent_buffer(tmp);
1128                         if (ret)
1129                                 return ret;
1130                 }
1131         }
1132
1133         return 0;
1134 }
1135
1136 static int copy_log_trees(struct btrfs_root *root,
1137                           struct metadump_struct *metadump)
1138 {
1139         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1140
1141         if (blocknr == 0)
1142                 return 0;
1143
1144         if (!root->fs_info->log_root_tree ||
1145             !root->fs_info->log_root_tree->node) {
1146                 error("unable to copy tree log, it has not been setup");
1147                 return -EIO;
1148         }
1149
1150         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1151                                 metadump, 1);
1152 }
1153
1154 static int copy_space_cache(struct btrfs_root *root,
1155                             struct metadump_struct *metadump,
1156                             struct btrfs_path *path)
1157 {
1158         struct extent_buffer *leaf;
1159         struct btrfs_file_extent_item *fi;
1160         struct btrfs_key key;
1161         u64 bytenr, num_bytes;
1162         int ret;
1163
1164         root = root->fs_info->tree_root;
1165
1166         key.objectid = 0;
1167         key.type = BTRFS_EXTENT_DATA_KEY;
1168         key.offset = 0;
1169
1170         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1171         if (ret < 0) {
1172                 error("free space inode not found: %d", ret);
1173                 return ret;
1174         }
1175
1176         leaf = path->nodes[0];
1177
1178         while (1) {
1179                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1180                         ret = btrfs_next_leaf(root, path);
1181                         if (ret < 0) {
1182                                 error("cannot go to next leaf %d", ret);
1183                                 return ret;
1184                         }
1185                         if (ret > 0)
1186                                 break;
1187                         leaf = path->nodes[0];
1188                 }
1189
1190                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1191                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1192                         path->slots[0]++;
1193                         continue;
1194                 }
1195
1196                 fi = btrfs_item_ptr(leaf, path->slots[0],
1197                                     struct btrfs_file_extent_item);
1198                 if (btrfs_file_extent_type(leaf, fi) !=
1199                     BTRFS_FILE_EXTENT_REG) {
1200                         path->slots[0]++;
1201                         continue;
1202                 }
1203
1204                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1205                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1206                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1207                 if (ret) {
1208                         error("unable to add space cache blocks %d", ret);
1209                         btrfs_release_path(path);
1210                         return ret;
1211                 }
1212                 path->slots[0]++;
1213         }
1214
1215         return 0;
1216 }
1217
1218 static int copy_from_extent_tree(struct metadump_struct *metadump,
1219                                  struct btrfs_path *path)
1220 {
1221         struct btrfs_root *extent_root;
1222         struct extent_buffer *leaf;
1223         struct btrfs_extent_item *ei;
1224         struct btrfs_key key;
1225         u64 bytenr;
1226         u64 num_bytes;
1227         int ret;
1228
1229         extent_root = metadump->root->fs_info->extent_root;
1230         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1231         key.objectid = bytenr;
1232         key.type = BTRFS_EXTENT_ITEM_KEY;
1233         key.offset = 0;
1234
1235         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1236         if (ret < 0) {
1237                 error("extent root not found: %d", ret);
1238                 return ret;
1239         }
1240         ret = 0;
1241
1242         leaf = path->nodes[0];
1243
1244         while (1) {
1245                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1246                         ret = btrfs_next_leaf(extent_root, path);
1247                         if (ret < 0) {
1248                                 error("cannot go to next leaf %d", ret);
1249                                 break;
1250                         }
1251                         if (ret > 0) {
1252                                 ret = 0;
1253                                 break;
1254                         }
1255                         leaf = path->nodes[0];
1256                 }
1257
1258                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1259                 if (key.objectid < bytenr ||
1260                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1261                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1262                         path->slots[0]++;
1263                         continue;
1264                 }
1265
1266                 bytenr = key.objectid;
1267                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1268                         num_bytes = extent_root->fs_info->nodesize;
1269                 } else {
1270                         num_bytes = key.offset;
1271                 }
1272
1273                 if (num_bytes == 0) {
1274                         error("extent length 0 at bytenr %llu key type %d",
1275                                         (unsigned long long)bytenr, key.type);
1276                         ret = -EIO;
1277                         break;
1278                 }
1279
1280                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1281                         ei = btrfs_item_ptr(leaf, path->slots[0],
1282                                             struct btrfs_extent_item);
1283                         if (btrfs_extent_flags(leaf, ei) &
1284                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1285                                 ret = add_extent(bytenr, num_bytes, metadump,
1286                                                  0);
1287                                 if (ret) {
1288                                         error("unable to add block %llu: %d",
1289                                                 (unsigned long long)bytenr, ret);
1290                                         break;
1291                                 }
1292                         }
1293                 } else {
1294 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1295                         ret = is_tree_block(extent_root, path, bytenr);
1296                         if (ret < 0) {
1297                                 error("failed to check tree block %llu: %d",
1298                                         (unsigned long long)bytenr, ret);
1299                                 break;
1300                         }
1301
1302                         if (ret) {
1303                                 ret = add_extent(bytenr, num_bytes, metadump,
1304                                                  0);
1305                                 if (ret) {
1306                                         error("unable to add block %llu: %d",
1307                                                 (unsigned long long)bytenr, ret);
1308                                         break;
1309                                 }
1310                         }
1311                         ret = 0;
1312 #else
1313                         error(
1314         "either extent tree is corrupted or you haven't built with V0 support");
1315                         ret = -EIO;
1316                         break;
1317 #endif
1318                 }
1319                 bytenr += num_bytes;
1320         }
1321
1322         btrfs_release_path(path);
1323
1324         return ret;
1325 }
1326
1327 static int create_metadump(const char *input, FILE *out, int num_threads,
1328                            int compress_level, int sanitize, int walk_trees)
1329 {
1330         struct btrfs_root *root;
1331         struct btrfs_path path;
1332         struct metadump_struct metadump;
1333         int ret;
1334         int err = 0;
1335
1336         root = open_ctree(input, 0, 0);
1337         if (!root) {
1338                 error("open ctree failed");
1339                 return -EIO;
1340         }
1341
1342         ret = metadump_init(&metadump, root, out, num_threads,
1343                             compress_level, sanitize);
1344         if (ret) {
1345                 error("failed to initialize metadump: %d", ret);
1346                 close_ctree(root);
1347                 return ret;
1348         }
1349
1350         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1351                         &metadump, 0);
1352         if (ret) {
1353                 error("unable to add metadata: %d", ret);
1354                 err = ret;
1355                 goto out;
1356         }
1357
1358         btrfs_init_path(&path);
1359
1360         if (walk_trees) {
1361                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1362                                        &metadump, 1);
1363                 if (ret) {
1364                         err = ret;
1365                         goto out;
1366                 }
1367
1368                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1369                                        &metadump, 1);
1370                 if (ret) {
1371                         err = ret;
1372                         goto out;
1373                 }
1374         } else {
1375                 ret = copy_from_extent_tree(&metadump, &path);
1376                 if (ret) {
1377                         err = ret;
1378                         goto out;
1379                 }
1380         }
1381
1382         ret = copy_log_trees(root, &metadump);
1383         if (ret) {
1384                 err = ret;
1385                 goto out;
1386         }
1387
1388         ret = copy_space_cache(root, &metadump, &path);
1389 out:
1390         ret = flush_pending(&metadump, 1);
1391         if (ret) {
1392                 if (!err)
1393                         err = ret;
1394                 error("failed to flush pending data: %d", ret);
1395         }
1396
1397         metadump_destroy(&metadump, num_threads);
1398
1399         btrfs_release_path(&path);
1400         ret = close_ctree(root);
1401         return err ? err : ret;
1402 }
1403
1404 static void update_super_old(u8 *buffer)
1405 {
1406         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1407         struct btrfs_chunk *chunk;
1408         struct btrfs_disk_key *key;
1409         u32 sectorsize = btrfs_super_sectorsize(super);
1410         u64 flags = btrfs_super_flags(super);
1411
1412         flags |= BTRFS_SUPER_FLAG_METADUMP;
1413         btrfs_set_super_flags(super, flags);
1414
1415         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1416         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1417                                        sizeof(struct btrfs_disk_key));
1418
1419         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1420         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1421         btrfs_set_disk_key_offset(key, 0);
1422
1423         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1424         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1425         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1426         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1427         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1428         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1429         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1430         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1431         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1432         chunk->stripe.devid = super->dev_item.devid;
1433         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1434         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1435         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1436         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1437 }
1438
1439 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1440 {
1441         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1442         struct btrfs_chunk *chunk;
1443         struct btrfs_disk_key *disk_key;
1444         struct btrfs_key key;
1445         u64 flags = btrfs_super_flags(super);
1446         u32 new_array_size = 0;
1447         u32 array_size;
1448         u32 cur = 0;
1449         u8 *ptr, *write_ptr;
1450         int old_num_stripes;
1451
1452         write_ptr = ptr = super->sys_chunk_array;
1453         array_size = btrfs_super_sys_array_size(super);
1454
1455         while (cur < array_size) {
1456                 disk_key = (struct btrfs_disk_key *)ptr;
1457                 btrfs_disk_key_to_cpu(&key, disk_key);
1458
1459                 new_array_size += sizeof(*disk_key);
1460                 memmove(write_ptr, ptr, sizeof(*disk_key));
1461
1462                 write_ptr += sizeof(*disk_key);
1463                 ptr += sizeof(*disk_key);
1464                 cur += sizeof(*disk_key);
1465
1466                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1467                         u64 type, physical, physical_dup, size = 0;
1468
1469                         chunk = (struct btrfs_chunk *)ptr;
1470                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1471                         chunk = (struct btrfs_chunk *)write_ptr;
1472
1473                         memmove(write_ptr, ptr, sizeof(*chunk));
1474                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1475                         type = btrfs_stack_chunk_type(chunk);
1476                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1477                                 new_array_size += sizeof(struct btrfs_stripe);
1478                                 write_ptr += sizeof(struct btrfs_stripe);
1479                         } else {
1480                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1481                                 btrfs_set_stack_chunk_type(chunk,
1482                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1483                         }
1484                         chunk->stripe.devid = super->dev_item.devid;
1485                         physical = logical_to_physical(mdres, key.offset,
1486                                                        &size, &physical_dup);
1487                         if (size != (u64)-1)
1488                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1489                                                               physical);
1490                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1491                                BTRFS_UUID_SIZE);
1492                         new_array_size += sizeof(*chunk);
1493                 } else {
1494                         error("bogus key in the sys array %d", key.type);
1495                         return -EIO;
1496                 }
1497                 write_ptr += sizeof(*chunk);
1498                 ptr += btrfs_chunk_item_size(old_num_stripes);
1499                 cur += btrfs_chunk_item_size(old_num_stripes);
1500         }
1501
1502         if (mdres->clear_space_cache)
1503                 btrfs_set_super_cache_generation(super, 0);
1504
1505         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1506         btrfs_set_super_flags(super, flags);
1507         btrfs_set_super_sys_array_size(super, new_array_size);
1508         btrfs_set_super_num_devices(super, 1);
1509         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1510
1511         return 0;
1512 }
1513
1514 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1515 {
1516         struct extent_buffer *eb;
1517
1518         eb = calloc(1, sizeof(struct extent_buffer) + size);
1519         if (!eb)
1520                 return NULL;
1521
1522         eb->start = bytenr;
1523         eb->len = size;
1524         return eb;
1525 }
1526
1527 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1528 {
1529         struct btrfs_item *item;
1530         u32 nritems;
1531         u32 old_size;
1532         u32 old_data_start;
1533         u32 size_diff;
1534         u32 data_end;
1535         int i;
1536
1537         old_size = btrfs_item_size_nr(eb, slot);
1538         if (old_size == new_size)
1539                 return;
1540
1541         nritems = btrfs_header_nritems(eb);
1542         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1543
1544         old_data_start = btrfs_item_offset_nr(eb, slot);
1545         size_diff = old_size - new_size;
1546
1547         for (i = slot; i < nritems; i++) {
1548                 u32 ioff;
1549                 item = btrfs_item_nr(i);
1550                 ioff = btrfs_item_offset(eb, item);
1551                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1552         }
1553
1554         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1555                               btrfs_leaf_data(eb) + data_end,
1556                               old_data_start + new_size - data_end);
1557         item = btrfs_item_nr(slot);
1558         btrfs_set_item_size(eb, item, new_size);
1559 }
1560
1561 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1562                                   struct async_work *async, u8 *buffer,
1563                                   size_t size)
1564 {
1565         struct extent_buffer *eb;
1566         size_t size_left = size;
1567         u64 bytenr = async->start;
1568         int i;
1569
1570         if (size_left % mdres->nodesize)
1571                 return 0;
1572
1573         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1574         if (!eb)
1575                 return -ENOMEM;
1576
1577         while (size_left) {
1578                 eb->start = bytenr;
1579                 memcpy(eb->data, buffer, mdres->nodesize);
1580
1581                 if (btrfs_header_bytenr(eb) != bytenr)
1582                         break;
1583                 if (memcmp(mdres->fsid,
1584                            eb->data + offsetof(struct btrfs_header, fsid),
1585                            BTRFS_FSID_SIZE))
1586                         break;
1587
1588                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1589                         goto next;
1590
1591                 if (btrfs_header_level(eb) != 0)
1592                         goto next;
1593
1594                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1595                         struct btrfs_chunk *chunk;
1596                         struct btrfs_key key;
1597                         u64 type, physical, physical_dup, size = (u64)-1;
1598
1599                         btrfs_item_key_to_cpu(eb, &key, i);
1600                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1601                                 continue;
1602
1603                         size = 0;
1604                         physical = logical_to_physical(mdres, key.offset,
1605                                                        &size, &physical_dup);
1606
1607                         if (!physical_dup)
1608                                 truncate_item(eb, i, sizeof(*chunk));
1609                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1610
1611
1612                         /* Zero out the RAID profile */
1613                         type = btrfs_chunk_type(eb, chunk);
1614                         type &= (BTRFS_BLOCK_GROUP_DATA |
1615                                  BTRFS_BLOCK_GROUP_SYSTEM |
1616                                  BTRFS_BLOCK_GROUP_METADATA |
1617                                  BTRFS_BLOCK_GROUP_DUP);
1618                         btrfs_set_chunk_type(eb, chunk, type);
1619
1620                         if (!physical_dup)
1621                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1622                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1623                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1624                         if (size != (u64)-1)
1625                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1626                                                            physical);
1627                         /* update stripe 2 offset */
1628                         if (physical_dup)
1629                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1630                                                            physical_dup);
1631
1632                         write_extent_buffer(eb, mdres->uuid,
1633                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1634                                                 chunk, 0),
1635                                         BTRFS_UUID_SIZE);
1636                 }
1637                 memcpy(buffer, eb->data, eb->len);
1638                 csum_block(buffer, eb->len);
1639 next:
1640                 size_left -= mdres->nodesize;
1641                 buffer += mdres->nodesize;
1642                 bytenr += mdres->nodesize;
1643         }
1644
1645         free(eb);
1646         return 0;
1647 }
1648
1649 static void write_backup_supers(int fd, u8 *buf)
1650 {
1651         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1652         struct stat st;
1653         u64 size;
1654         u64 bytenr;
1655         int i;
1656         int ret;
1657
1658         if (fstat(fd, &st)) {
1659                 error(
1660         "cannot stat restore point, won't be able to write backup supers: %s",
1661                         strerror(errno));
1662                 return;
1663         }
1664
1665         size = btrfs_device_size(fd, &st);
1666
1667         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1668                 bytenr = btrfs_sb_offset(i);
1669                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1670                         break;
1671                 btrfs_set_super_bytenr(super, bytenr);
1672                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1673                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1674                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1675                         if (ret < 0)
1676                                 error(
1677                                 "problem writing out backup super block %d: %s",
1678                                                 i, strerror(errno));
1679                         else
1680                                 error("short write writing out backup super block");
1681                         break;
1682                 }
1683         }
1684 }
1685
1686 static void *restore_worker(void *data)
1687 {
1688         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1689         struct async_work *async;
1690         size_t size;
1691         u8 *buffer;
1692         u8 *outbuf;
1693         int outfd;
1694         int ret;
1695         int compress_size = MAX_PENDING_SIZE * 4;
1696
1697         outfd = fileno(mdres->out);
1698         buffer = malloc(compress_size);
1699         if (!buffer) {
1700                 error("not enough memory for restore worker buffer");
1701                 pthread_mutex_lock(&mdres->mutex);
1702                 if (!mdres->error)
1703                         mdres->error = -ENOMEM;
1704                 pthread_mutex_unlock(&mdres->mutex);
1705                 pthread_exit(NULL);
1706         }
1707
1708         while (1) {
1709                 u64 bytenr, physical_dup;
1710                 off_t offset = 0;
1711                 int err = 0;
1712
1713                 pthread_mutex_lock(&mdres->mutex);
1714                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1715                         if (mdres->done) {
1716                                 pthread_mutex_unlock(&mdres->mutex);
1717                                 goto out;
1718                         }
1719                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1720                 }
1721                 async = list_entry(mdres->list.next, struct async_work, list);
1722                 list_del_init(&async->list);
1723
1724                 if (mdres->compress_method == COMPRESS_ZLIB) {
1725                         size = compress_size; 
1726                         pthread_mutex_unlock(&mdres->mutex);
1727                         ret = uncompress(buffer, (unsigned long *)&size,
1728                                          async->buffer, async->bufsize);
1729                         pthread_mutex_lock(&mdres->mutex);
1730                         if (ret != Z_OK) {
1731                                 error("decompressiion failed with %d", ret);
1732                                 err = -EIO;
1733                         }
1734                         outbuf = buffer;
1735                 } else {
1736                         outbuf = async->buffer;
1737                         size = async->bufsize;
1738                 }
1739
1740                 if (!mdres->multi_devices) {
1741                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1742                                 if (mdres->old_restore) {
1743                                         update_super_old(outbuf);
1744                                 } else {
1745                                         ret = update_super(mdres, outbuf);
1746                                         if (ret)
1747                                                 err = ret;
1748                                 }
1749                         } else if (!mdres->old_restore) {
1750                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1751                                 if (ret)
1752                                         err = ret;
1753                         }
1754                 }
1755
1756                 if (!mdres->fixup_offset) {
1757                         while (size) {
1758                                 u64 chunk_size = size;
1759                                 physical_dup = 0;
1760                                 if (!mdres->multi_devices && !mdres->old_restore)
1761                                         bytenr = logical_to_physical(mdres,
1762                                                      async->start + offset,
1763                                                      &chunk_size,
1764                                                      &physical_dup);
1765                                 else
1766                                         bytenr = async->start + offset;
1767
1768                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1769                                                bytenr);
1770                                 if (ret != chunk_size)
1771                                         goto error;
1772
1773                                 if (physical_dup)
1774                                         ret = pwrite64(outfd, outbuf+offset,
1775                                                        chunk_size,
1776                                                        physical_dup);
1777                                 if (ret != chunk_size)
1778                                         goto error;
1779
1780                                 size -= chunk_size;
1781                                 offset += chunk_size;
1782                                 continue;
1783
1784 error:
1785                                 if (ret < 0) {
1786                                         error("unable to write to device: %s",
1787                                                         strerror(errno));
1788                                         err = errno;
1789                                 } else {
1790                                         error("short write");
1791                                         err = -EIO;
1792                                 }
1793                         }
1794                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1795                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1796                         if (ret) {
1797                                 error("failed to write data");
1798                                 exit(1);
1799                         }
1800                 }
1801
1802
1803                 /* backup super blocks are already there at fixup_offset stage */
1804                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1805                         write_backup_supers(outfd, outbuf);
1806
1807                 if (err && !mdres->error)
1808                         mdres->error = err;
1809                 mdres->num_items--;
1810                 pthread_mutex_unlock(&mdres->mutex);
1811
1812                 free(async->buffer);
1813                 free(async);
1814         }
1815 out:
1816         free(buffer);
1817         pthread_exit(NULL);
1818 }
1819
1820 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1821 {
1822         struct rb_node *n;
1823         int i;
1824
1825         while ((n = rb_first(&mdres->chunk_tree))) {
1826                 struct fs_chunk *entry;
1827
1828                 entry = rb_entry(n, struct fs_chunk, l);
1829                 rb_erase(n, &mdres->chunk_tree);
1830                 rb_erase(&entry->p, &mdres->physical_tree);
1831                 free(entry);
1832         }
1833         pthread_mutex_lock(&mdres->mutex);
1834         mdres->done = 1;
1835         pthread_cond_broadcast(&mdres->cond);
1836         pthread_mutex_unlock(&mdres->mutex);
1837
1838         for (i = 0; i < num_threads; i++)
1839                 pthread_join(mdres->threads[i], NULL);
1840
1841         pthread_cond_destroy(&mdres->cond);
1842         pthread_mutex_destroy(&mdres->mutex);
1843 }
1844
1845 static int mdrestore_init(struct mdrestore_struct *mdres,
1846                           FILE *in, FILE *out, int old_restore,
1847                           int num_threads, int fixup_offset,
1848                           struct btrfs_fs_info *info, int multi_devices)
1849 {
1850         int i, ret = 0;
1851
1852         memset(mdres, 0, sizeof(*mdres));
1853         pthread_cond_init(&mdres->cond, NULL);
1854         pthread_mutex_init(&mdres->mutex, NULL);
1855         INIT_LIST_HEAD(&mdres->list);
1856         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1857         mdres->in = in;
1858         mdres->out = out;
1859         mdres->old_restore = old_restore;
1860         mdres->chunk_tree.rb_node = NULL;
1861         mdres->fixup_offset = fixup_offset;
1862         mdres->info = info;
1863         mdres->multi_devices = multi_devices;
1864         mdres->clear_space_cache = 0;
1865         mdres->last_physical_offset = 0;
1866         mdres->alloced_chunks = 0;
1867
1868         if (!num_threads)
1869                 return 0;
1870
1871         mdres->num_threads = num_threads;
1872         for (i = 0; i < num_threads; i++) {
1873                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1874                                      mdres);
1875                 if (ret) {
1876                         /* pthread_create returns errno directly */
1877                         ret = -ret;
1878                         break;
1879                 }
1880         }
1881         if (ret)
1882                 mdrestore_destroy(mdres, i + 1);
1883         return ret;
1884 }
1885
1886 static int fill_mdres_info(struct mdrestore_struct *mdres,
1887                            struct async_work *async)
1888 {
1889         struct btrfs_super_block *super;
1890         u8 *buffer = NULL;
1891         u8 *outbuf;
1892         int ret;
1893
1894         /* We've already been initialized */
1895         if (mdres->nodesize)
1896                 return 0;
1897
1898         if (mdres->compress_method == COMPRESS_ZLIB) {
1899                 size_t size = MAX_PENDING_SIZE * 2;
1900
1901                 buffer = malloc(MAX_PENDING_SIZE * 2);
1902                 if (!buffer)
1903                         return -ENOMEM;
1904                 ret = uncompress(buffer, (unsigned long *)&size,
1905                                  async->buffer, async->bufsize);
1906                 if (ret != Z_OK) {
1907                         error("decompressiion failed with %d", ret);
1908                         free(buffer);
1909                         return -EIO;
1910                 }
1911                 outbuf = buffer;
1912         } else {
1913                 outbuf = async->buffer;
1914         }
1915
1916         super = (struct btrfs_super_block *)outbuf;
1917         mdres->nodesize = btrfs_super_nodesize(super);
1918         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1919         memcpy(mdres->uuid, super->dev_item.uuid,
1920                        BTRFS_UUID_SIZE);
1921         mdres->devid = le64_to_cpu(super->dev_item.devid);
1922         free(buffer);
1923         return 0;
1924 }
1925
1926 static int add_cluster(struct meta_cluster *cluster,
1927                        struct mdrestore_struct *mdres, u64 *next)
1928 {
1929         struct meta_cluster_item *item;
1930         struct meta_cluster_header *header = &cluster->header;
1931         struct async_work *async;
1932         u64 bytenr;
1933         u32 i, nritems;
1934         int ret;
1935
1936         pthread_mutex_lock(&mdres->mutex);
1937         mdres->compress_method = header->compress;
1938         pthread_mutex_unlock(&mdres->mutex);
1939
1940         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1941         nritems = le32_to_cpu(header->nritems);
1942         for (i = 0; i < nritems; i++) {
1943                 item = &cluster->items[i];
1944                 async = calloc(1, sizeof(*async));
1945                 if (!async) {
1946                         error("not enough memory for async data");
1947                         return -ENOMEM;
1948                 }
1949                 async->start = le64_to_cpu(item->bytenr);
1950                 async->bufsize = le32_to_cpu(item->size);
1951                 async->buffer = malloc(async->bufsize);
1952                 if (!async->buffer) {
1953                         error("not enough memory for async buffer");
1954                         free(async);
1955                         return -ENOMEM;
1956                 }
1957                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1958                 if (ret != 1) {
1959                         error("unable to read buffer: %s", strerror(errno));
1960                         free(async->buffer);
1961                         free(async);
1962                         return -EIO;
1963                 }
1964                 bytenr += async->bufsize;
1965
1966                 pthread_mutex_lock(&mdres->mutex);
1967                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1968                         ret = fill_mdres_info(mdres, async);
1969                         if (ret) {
1970                                 error("unable to set up restore state");
1971                                 pthread_mutex_unlock(&mdres->mutex);
1972                                 free(async->buffer);
1973                                 free(async);
1974                                 return ret;
1975                         }
1976                 }
1977                 list_add_tail(&async->list, &mdres->list);
1978                 mdres->num_items++;
1979                 pthread_cond_signal(&mdres->cond);
1980                 pthread_mutex_unlock(&mdres->mutex);
1981         }
1982         if (bytenr & BLOCK_MASK) {
1983                 char buffer[BLOCK_MASK];
1984                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1985
1986                 bytenr += size;
1987                 ret = fread(buffer, size, 1, mdres->in);
1988                 if (ret != 1) {
1989                         error("failed to read buffer: %s", strerror(errno));
1990                         return -EIO;
1991                 }
1992         }
1993         *next = bytenr;
1994         return 0;
1995 }
1996
1997 static int wait_for_worker(struct mdrestore_struct *mdres)
1998 {
1999         int ret = 0;
2000
2001         pthread_mutex_lock(&mdres->mutex);
2002         ret = mdres->error;
2003         while (!ret && mdres->num_items > 0) {
2004                 struct timespec ts = {
2005                         .tv_sec = 0,
2006                         .tv_nsec = 10000000,
2007                 };
2008                 pthread_mutex_unlock(&mdres->mutex);
2009                 nanosleep(&ts, NULL);
2010                 pthread_mutex_lock(&mdres->mutex);
2011                 ret = mdres->error;
2012         }
2013         pthread_mutex_unlock(&mdres->mutex);
2014         return ret;
2015 }
2016
2017 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2018                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2019                             u64 cluster_bytenr)
2020 {
2021         struct extent_buffer *eb;
2022         int ret = 0;
2023         int i;
2024
2025         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2026         if (!eb) {
2027                 ret = -ENOMEM;
2028                 goto out;
2029         }
2030
2031         while (item_bytenr != bytenr) {
2032                 buffer += mdres->nodesize;
2033                 item_bytenr += mdres->nodesize;
2034         }
2035
2036         memcpy(eb->data, buffer, mdres->nodesize);
2037         if (btrfs_header_bytenr(eb) != bytenr) {
2038                 error("eb bytenr does not match found bytenr: %llu != %llu",
2039                                 (unsigned long long)btrfs_header_bytenr(eb),
2040                                 (unsigned long long)bytenr);
2041                 ret = -EIO;
2042                 goto out;
2043         }
2044
2045         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2046                    BTRFS_FSID_SIZE)) {
2047                 error("filesystem UUID of eb %llu does not match",
2048                                 (unsigned long long)bytenr);
2049                 ret = -EIO;
2050                 goto out;
2051         }
2052
2053         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2054                 error("wrong eb %llu owner %llu",
2055                                 (unsigned long long)bytenr,
2056                                 (unsigned long long)btrfs_header_owner(eb));
2057                 ret = -EIO;
2058                 goto out;
2059         }
2060
2061         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2062                 struct btrfs_chunk *chunk;
2063                 struct fs_chunk *fs_chunk;
2064                 struct btrfs_key key;
2065                 u64 type;
2066
2067                 if (btrfs_header_level(eb)) {
2068                         u64 blockptr = btrfs_node_blockptr(eb, i);
2069
2070                         ret = search_for_chunk_blocks(mdres, blockptr,
2071                                                       cluster_bytenr);
2072                         if (ret)
2073                                 break;
2074                         continue;
2075                 }
2076
2077                 /* Yay a leaf!  We loves leafs! */
2078                 btrfs_item_key_to_cpu(eb, &key, i);
2079                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2080                         continue;
2081
2082                 fs_chunk = malloc(sizeof(struct fs_chunk));
2083                 if (!fs_chunk) {
2084                         error("not enough memory to allocate chunk");
2085                         ret = -ENOMEM;
2086                         break;
2087                 }
2088                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2089                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2090
2091                 fs_chunk->logical = key.offset;
2092                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2093                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2094                 INIT_LIST_HEAD(&fs_chunk->list);
2095                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2096                                 physical_cmp, 1) != NULL)
2097                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2098                 else
2099                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2100                                     physical_cmp);
2101
2102                 type = btrfs_chunk_type(eb, chunk);
2103                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2104                         fs_chunk->physical_dup =
2105                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2106                 }
2107
2108                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2109                     mdres->last_physical_offset)
2110                         mdres->last_physical_offset = fs_chunk->physical_dup +
2111                                 fs_chunk->bytes;
2112                 else if (fs_chunk->physical + fs_chunk->bytes >
2113                     mdres->last_physical_offset)
2114                         mdres->last_physical_offset = fs_chunk->physical +
2115                                 fs_chunk->bytes;
2116                 mdres->alloced_chunks += fs_chunk->bytes;
2117                 /* in dup case, fs_chunk->bytes should add twice */
2118                 if (fs_chunk->physical_dup)
2119                         mdres->alloced_chunks += fs_chunk->bytes;
2120                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2121         }
2122 out:
2123         free(eb);
2124         return ret;
2125 }
2126
2127 /* If you have to ask you aren't worthy */
2128 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2129                                    u64 search, u64 cluster_bytenr)
2130 {
2131         struct meta_cluster *cluster;
2132         struct meta_cluster_header *header;
2133         struct meta_cluster_item *item;
2134         u64 current_cluster = cluster_bytenr, bytenr;
2135         u64 item_bytenr;
2136         u32 bufsize, nritems, i;
2137         u32 max_size = MAX_PENDING_SIZE * 2;
2138         u8 *buffer, *tmp = NULL;
2139         int ret = 0;
2140
2141         cluster = malloc(BLOCK_SIZE);
2142         if (!cluster) {
2143                 error("not enough memory for cluster");
2144                 return -ENOMEM;
2145         }
2146
2147         buffer = malloc(max_size);
2148         if (!buffer) {
2149                 error("not enough memory for buffer");
2150                 free(cluster);
2151                 return -ENOMEM;
2152         }
2153
2154         if (mdres->compress_method == COMPRESS_ZLIB) {
2155                 tmp = malloc(max_size);
2156                 if (!tmp) {
2157                         error("not enough memory for buffer");
2158                         free(cluster);
2159                         free(buffer);
2160                         return -ENOMEM;
2161                 }
2162         }
2163
2164         bytenr = current_cluster;
2165         while (1) {
2166                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2167                         error("seek failed: %s", strerror(errno));
2168                         ret = -EIO;
2169                         break;
2170                 }
2171
2172                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2173                 if (ret == 0) {
2174                         if (cluster_bytenr != 0) {
2175                                 cluster_bytenr = 0;
2176                                 current_cluster = 0;
2177                                 bytenr = 0;
2178                                 continue;
2179                         }
2180                         error(
2181         "unknown state after reading cluster at %llu, probably crrupted data",
2182                                         cluster_bytenr);
2183                         ret = -EIO;
2184                         break;
2185                 } else if (ret < 0) {
2186                         error("unable to read image at %llu: %s",
2187                                         (unsigned long long)cluster_bytenr,
2188                                         strerror(errno));
2189                         break;
2190                 }
2191                 ret = 0;
2192
2193                 header = &cluster->header;
2194                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2195                     le64_to_cpu(header->bytenr) != current_cluster) {
2196                         error("bad header in metadump image");
2197                         ret = -EIO;
2198                         break;
2199                 }
2200
2201                 bytenr += BLOCK_SIZE;
2202                 nritems = le32_to_cpu(header->nritems);
2203                 for (i = 0; i < nritems; i++) {
2204                         size_t size;
2205
2206                         item = &cluster->items[i];
2207                         bufsize = le32_to_cpu(item->size);
2208                         item_bytenr = le64_to_cpu(item->bytenr);
2209
2210                         if (bufsize > max_size) {
2211                                 error("item %u too big: %u > %u", i, bufsize,
2212                                                 max_size);
2213                                 ret = -EIO;
2214                                 break;
2215                         }
2216
2217                         if (mdres->compress_method == COMPRESS_ZLIB) {
2218                                 ret = fread(tmp, bufsize, 1, mdres->in);
2219                                 if (ret != 1) {
2220                                         error("read error: %s", strerror(errno));
2221                                         ret = -EIO;
2222                                         break;
2223                                 }
2224
2225                                 size = max_size;
2226                                 ret = uncompress(buffer,
2227                                                  (unsigned long *)&size, tmp,
2228                                                  bufsize);
2229                                 if (ret != Z_OK) {
2230                                         error("decompressiion failed with %d",
2231                                                         ret);
2232                                         ret = -EIO;
2233                                         break;
2234                                 }
2235                         } else {
2236                                 ret = fread(buffer, bufsize, 1, mdres->in);
2237                                 if (ret != 1) {
2238                                         error("read error: %s",
2239                                                         strerror(errno));
2240                                         ret = -EIO;
2241                                         break;
2242                                 }
2243                                 size = bufsize;
2244                         }
2245                         ret = 0;
2246
2247                         if (item_bytenr <= search &&
2248                             item_bytenr + size > search) {
2249                                 ret = read_chunk_block(mdres, buffer, search,
2250                                                        item_bytenr, size,
2251                                                        current_cluster);
2252                                 if (!ret)
2253                                         ret = 1;
2254                                 break;
2255                         }
2256                         bytenr += bufsize;
2257                 }
2258                 if (ret) {
2259                         if (ret > 0)
2260                                 ret = 0;
2261                         break;
2262                 }
2263                 if (bytenr & BLOCK_MASK)
2264                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2265                 current_cluster = bytenr;
2266         }
2267
2268         free(tmp);
2269         free(buffer);
2270         free(cluster);
2271         return ret;
2272 }
2273
2274 static int build_chunk_tree(struct mdrestore_struct *mdres,
2275                             struct meta_cluster *cluster)
2276 {
2277         struct btrfs_super_block *super;
2278         struct meta_cluster_header *header;
2279         struct meta_cluster_item *item = NULL;
2280         u64 chunk_root_bytenr = 0;
2281         u32 i, nritems;
2282         u64 bytenr = 0;
2283         u8 *buffer;
2284         int ret;
2285
2286         /* We can't seek with stdin so don't bother doing this */
2287         if (mdres->in == stdin)
2288                 return 0;
2289
2290         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2291         if (ret <= 0) {
2292                 error("unable to read cluster: %s", strerror(errno));
2293                 return -EIO;
2294         }
2295         ret = 0;
2296
2297         header = &cluster->header;
2298         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2299             le64_to_cpu(header->bytenr) != 0) {
2300                 error("bad header in metadump image");
2301                 return -EIO;
2302         }
2303
2304         bytenr += BLOCK_SIZE;
2305         mdres->compress_method = header->compress;
2306         nritems = le32_to_cpu(header->nritems);
2307         for (i = 0; i < nritems; i++) {
2308                 item = &cluster->items[i];
2309
2310                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2311                         break;
2312                 bytenr += le32_to_cpu(item->size);
2313                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2314                         error("seek failed: %s", strerror(errno));
2315                         return -EIO;
2316                 }
2317         }
2318
2319         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2320                 error("did not find superblock at %llu",
2321                                 le64_to_cpu(item->bytenr));
2322                 return -EINVAL;
2323         }
2324
2325         buffer = malloc(le32_to_cpu(item->size));
2326         if (!buffer) {
2327                 error("not enough memory to allocate buffer");
2328                 return -ENOMEM;
2329         }
2330
2331         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2332         if (ret != 1) {
2333                 error("unable to read buffer: %s", strerror(errno));
2334                 free(buffer);
2335                 return -EIO;
2336         }
2337
2338         if (mdres->compress_method == COMPRESS_ZLIB) {
2339                 size_t size = MAX_PENDING_SIZE * 2;
2340                 u8 *tmp;
2341
2342                 tmp = malloc(MAX_PENDING_SIZE * 2);
2343                 if (!tmp) {
2344                         free(buffer);
2345                         return -ENOMEM;
2346                 }
2347                 ret = uncompress(tmp, (unsigned long *)&size,
2348                                  buffer, le32_to_cpu(item->size));
2349                 if (ret != Z_OK) {
2350                         error("decompressiion failed with %d", ret);
2351                         free(buffer);
2352                         free(tmp);
2353                         return -EIO;
2354                 }
2355                 free(buffer);
2356                 buffer = tmp;
2357         }
2358
2359         pthread_mutex_lock(&mdres->mutex);
2360         super = (struct btrfs_super_block *)buffer;
2361         chunk_root_bytenr = btrfs_super_chunk_root(super);
2362         mdres->nodesize = btrfs_super_nodesize(super);
2363         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2364         memcpy(mdres->uuid, super->dev_item.uuid,
2365                        BTRFS_UUID_SIZE);
2366         mdres->devid = le64_to_cpu(super->dev_item.devid);
2367         free(buffer);
2368         pthread_mutex_unlock(&mdres->mutex);
2369
2370         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2371 }
2372
2373 static int range_contains_super(u64 physical, u64 bytes)
2374 {
2375         u64 super_bytenr;
2376         int i;
2377
2378         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2379                 super_bytenr = btrfs_sb_offset(i);
2380                 if (super_bytenr >= physical &&
2381                     super_bytenr < physical + bytes)
2382                         return 1;
2383         }
2384
2385         return 0;
2386 }
2387
2388 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2389 {
2390         struct fs_chunk *fs_chunk;
2391
2392         while (!list_empty(&mdres->overlapping_chunks)) {
2393                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2394                                             struct fs_chunk, list);
2395                 list_del_init(&fs_chunk->list);
2396                 if (range_contains_super(fs_chunk->physical,
2397                                          fs_chunk->bytes)) {
2398                         warning(
2399 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2400                         mdres->clear_space_cache = 1;
2401                 }
2402                 fs_chunk->physical = mdres->last_physical_offset;
2403                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2404                 mdres->last_physical_offset += fs_chunk->bytes;
2405         }
2406 }
2407
2408 static int fixup_devices(struct btrfs_fs_info *fs_info,
2409                          struct mdrestore_struct *mdres, off_t dev_size)
2410 {
2411         struct btrfs_trans_handle *trans;
2412         struct btrfs_dev_item *dev_item;
2413         struct btrfs_path path;
2414         struct extent_buffer *leaf;
2415         struct btrfs_root *root = fs_info->chunk_root;
2416         struct btrfs_key key;
2417         u64 devid, cur_devid;
2418         int ret;
2419
2420         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2421         if (IS_ERR(trans)) {
2422                 error("cannot starting transaction %ld", PTR_ERR(trans));
2423                 return PTR_ERR(trans);
2424         }
2425
2426         dev_item = &fs_info->super_copy->dev_item;
2427
2428         devid = btrfs_stack_device_id(dev_item);
2429
2430         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2431         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2432
2433         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2434         key.type = BTRFS_DEV_ITEM_KEY;
2435         key.offset = 0;
2436
2437         btrfs_init_path(&path);
2438
2439 again:
2440         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2441         if (ret < 0) {
2442                 error("search failed: %d", ret);
2443                 exit(1);
2444         }
2445
2446         while (1) {
2447                 leaf = path.nodes[0];
2448                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2449                         ret = btrfs_next_leaf(root, &path);
2450                         if (ret < 0) {
2451                                 error("cannot go to next leaf %d", ret);
2452                                 exit(1);
2453                         }
2454                         if (ret > 0) {
2455                                 ret = 0;
2456                                 break;
2457                         }
2458                         leaf = path.nodes[0];
2459                 }
2460
2461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2462                 if (key.type > BTRFS_DEV_ITEM_KEY)
2463                         break;
2464                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2465                         path.slots[0]++;
2466                         continue;
2467                 }
2468
2469                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2470                                           struct btrfs_dev_item);
2471                 cur_devid = btrfs_device_id(leaf, dev_item);
2472                 if (devid != cur_devid) {
2473                         ret = btrfs_del_item(trans, root, &path);
2474                         if (ret) {
2475                                 error("cannot delete item: %d", ret);
2476                                 exit(1);
2477                         }
2478                         btrfs_release_path(&path);
2479                         goto again;
2480                 }
2481
2482                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2483                 btrfs_set_device_bytes_used(leaf, dev_item,
2484                                             mdres->alloced_chunks);
2485                 btrfs_mark_buffer_dirty(leaf);
2486                 path.slots[0]++;
2487         }
2488
2489         btrfs_release_path(&path);
2490         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2491         if (ret) {
2492                 error("unable to commit transaction: %d", ret);
2493                 return ret;
2494         }
2495         return 0;
2496 }
2497
2498 static int restore_metadump(const char *input, FILE *out, int old_restore,
2499                             int num_threads, int fixup_offset,
2500                             const char *target, int multi_devices)
2501 {
2502         struct meta_cluster *cluster = NULL;
2503         struct meta_cluster_header *header;
2504         struct mdrestore_struct mdrestore;
2505         struct btrfs_fs_info *info = NULL;
2506         u64 bytenr = 0;
2507         FILE *in = NULL;
2508         int ret = 0;
2509
2510         if (!strcmp(input, "-")) {
2511                 in = stdin;
2512         } else {
2513                 in = fopen(input, "r");
2514                 if (!in) {
2515                         error("unable to open metadump image: %s",
2516                                         strerror(errno));
2517                         return 1;
2518                 }
2519         }
2520
2521         /* NOTE: open with write mode */
2522         if (fixup_offset) {
2523                 info = open_ctree_fs_info(target, 0, 0, 0,
2524                                           OPEN_CTREE_WRITES |
2525                                           OPEN_CTREE_RESTORE |
2526                                           OPEN_CTREE_PARTIAL);
2527                 if (!info) {
2528                         error("open ctree failed");
2529                         ret = -EIO;
2530                         goto failed_open;
2531                 }
2532         }
2533
2534         cluster = malloc(BLOCK_SIZE);
2535         if (!cluster) {
2536                 error("not enough memory for cluster");
2537                 ret = -ENOMEM;
2538                 goto failed_info;
2539         }
2540
2541         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2542                              fixup_offset, info, multi_devices);
2543         if (ret) {
2544                 error("failed to initialize metadata restore state: %d", ret);
2545                 goto failed_cluster;
2546         }
2547
2548         if (!multi_devices && !old_restore) {
2549                 ret = build_chunk_tree(&mdrestore, cluster);
2550                 if (ret)
2551                         goto out;
2552                 if (!list_empty(&mdrestore.overlapping_chunks))
2553                         remap_overlapping_chunks(&mdrestore);
2554         }
2555
2556         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2557                 error("seek failed: %s", strerror(errno));
2558                 goto out;
2559         }
2560
2561         while (!mdrestore.error) {
2562                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2563                 if (!ret)
2564                         break;
2565
2566                 header = &cluster->header;
2567                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2568                     le64_to_cpu(header->bytenr) != bytenr) {
2569                         error("bad header in metadump image");
2570                         ret = -EIO;
2571                         break;
2572                 }
2573                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2574                 if (ret) {
2575                         error("failed to add cluster: %d", ret);
2576                         break;
2577                 }
2578         }
2579         ret = wait_for_worker(&mdrestore);
2580
2581         if (!ret && !multi_devices && !old_restore) {
2582                 struct btrfs_root *root;
2583                 struct stat st;
2584
2585                 root = open_ctree_fd(fileno(out), target, 0,
2586                                           OPEN_CTREE_PARTIAL |
2587                                           OPEN_CTREE_WRITES |
2588                                           OPEN_CTREE_NO_DEVICES);
2589                 if (!root) {
2590                         error("open ctree failed in %s", target);
2591                         ret = -EIO;
2592                         goto out;
2593                 }
2594                 info = root->fs_info;
2595
2596                 if (stat(target, &st)) {
2597                         error("stat %s failed: %s", target, strerror(errno));
2598                         close_ctree(info->chunk_root);
2599                         free(cluster);
2600                         return 1;
2601                 }
2602
2603                 ret = fixup_devices(info, &mdrestore, st.st_size);
2604                 close_ctree(info->chunk_root);
2605                 if (ret)
2606                         goto out;
2607         }
2608 out:
2609         mdrestore_destroy(&mdrestore, num_threads);
2610 failed_cluster:
2611         free(cluster);
2612 failed_info:
2613         if (fixup_offset && info)
2614                 close_ctree(info->chunk_root);
2615 failed_open:
2616         if (in != stdin)
2617                 fclose(in);
2618         return ret;
2619 }
2620
2621 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2622                                        const char *other_dev, u64 cur_devid)
2623 {
2624         struct btrfs_key key;
2625         struct extent_buffer *leaf;
2626         struct btrfs_path path;
2627         struct btrfs_dev_item *dev_item;
2628         struct btrfs_super_block *disk_super;
2629         char dev_uuid[BTRFS_UUID_SIZE];
2630         char fs_uuid[BTRFS_UUID_SIZE];
2631         u64 devid, type, io_align, io_width;
2632         u64 sector_size, total_bytes, bytes_used;
2633         char buf[BTRFS_SUPER_INFO_SIZE];
2634         int fp = -1;
2635         int ret;
2636
2637         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2638         key.type = BTRFS_DEV_ITEM_KEY;
2639         key.offset = cur_devid;
2640
2641         btrfs_init_path(&path);
2642         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2643         if (ret) {
2644                 error("search key failed: %d", ret);
2645                 ret = -EIO;
2646                 goto out;
2647         }
2648
2649         leaf = path.nodes[0];
2650         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2651                                   struct btrfs_dev_item);
2652
2653         devid = btrfs_device_id(leaf, dev_item);
2654         if (devid != cur_devid) {
2655                 error("devid mismatch: %llu != %llu",
2656                                 (unsigned long long)devid,
2657                                 (unsigned long long)cur_devid);
2658                 ret = -EIO;
2659                 goto out;
2660         }
2661
2662         type = btrfs_device_type(leaf, dev_item);
2663         io_align = btrfs_device_io_align(leaf, dev_item);
2664         io_width = btrfs_device_io_width(leaf, dev_item);
2665         sector_size = btrfs_device_sector_size(leaf, dev_item);
2666         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2667         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2668         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2669         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2670
2671         btrfs_release_path(&path);
2672
2673         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2674
2675         /* update other devices' super block */
2676         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2677         if (fp < 0) {
2678                 error("could not open %s: %s", other_dev, strerror(errno));
2679                 ret = -EIO;
2680                 goto out;
2681         }
2682
2683         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2684
2685         disk_super = (struct btrfs_super_block *)buf;
2686         dev_item = &disk_super->dev_item;
2687
2688         btrfs_set_stack_device_type(dev_item, type);
2689         btrfs_set_stack_device_id(dev_item, devid);
2690         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2691         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2692         btrfs_set_stack_device_io_align(dev_item, io_align);
2693         btrfs_set_stack_device_io_width(dev_item, io_width);
2694         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2695         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2696         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2697         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2698
2699         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2700         if (ret != BTRFS_SUPER_INFO_SIZE) {
2701                 if (ret < 0)
2702                         error("cannot write superblock: %s", strerror(ret));
2703                 else
2704                         error("cannot write superblock");
2705                 ret = -EIO;
2706                 goto out;
2707         }
2708
2709         write_backup_supers(fp, (u8 *)buf);
2710
2711 out:
2712         if (fp != -1)
2713                 close(fp);
2714         return ret;
2715 }
2716
2717 static void print_usage(int ret)
2718 {
2719         printf("usage: btrfs-image [options] source target\n");
2720         printf("\t-r      \trestore metadump image\n");
2721         printf("\t-c value\tcompression level (0 ~ 9)\n");
2722         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2723         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2724         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2725         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2726         printf("\t-m       \trestore for multiple devices\n");
2727         printf("\n");
2728         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2729         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2730         exit(ret);
2731 }
2732
2733 int main(int argc, char *argv[])
2734 {
2735         char *source;
2736         char *target;
2737         u64 num_threads = 0;
2738         u64 compress_level = 0;
2739         int create = 1;
2740         int old_restore = 0;
2741         int walk_trees = 0;
2742         int multi_devices = 0;
2743         int ret;
2744         int sanitize = 0;
2745         int dev_cnt = 0;
2746         int usage_error = 0;
2747         FILE *out;
2748
2749         while (1) {
2750                 static const struct option long_options[] = {
2751                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2752                         { NULL, 0, NULL, 0 }
2753                 };
2754                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2755                 if (c < 0)
2756                         break;
2757                 switch (c) {
2758                 case 'r':
2759                         create = 0;
2760                         break;
2761                 case 't':
2762                         num_threads = arg_strtou64(optarg);
2763                         if (num_threads > MAX_WORKER_THREADS) {
2764                                 error("number of threads out of range: %llu > %d",
2765                                         (unsigned long long)num_threads,
2766                                         MAX_WORKER_THREADS);
2767                                 return 1;
2768                         }
2769                         break;
2770                 case 'c':
2771                         compress_level = arg_strtou64(optarg);
2772                         if (compress_level > 9) {
2773                                 error("compression level out of range: %llu",
2774                                         (unsigned long long)compress_level);
2775                                 return 1;
2776                         }
2777                         break;
2778                 case 'o':
2779                         old_restore = 1;
2780                         break;
2781                 case 's':
2782                         sanitize++;
2783                         break;
2784                 case 'w':
2785                         walk_trees = 1;
2786                         break;
2787                 case 'm':
2788                         create = 0;
2789                         multi_devices = 1;
2790                         break;
2791                         case GETOPT_VAL_HELP:
2792                 default:
2793                         print_usage(c != GETOPT_VAL_HELP);
2794                 }
2795         }
2796
2797         set_argv0(argv);
2798         if (check_argc_min(argc - optind, 2))
2799                 print_usage(1);
2800
2801         dev_cnt = argc - optind - 1;
2802
2803         if (create) {
2804                 if (old_restore) {
2805                         error(
2806                         "create and restore cannot be used at the same time");
2807                         usage_error++;
2808                 }
2809         } else {
2810                 if (walk_trees || sanitize || compress_level) {
2811                         error(
2812                         "useing -w, -s, -c options for restore makes no sense");
2813                         usage_error++;
2814                 }
2815                 if (multi_devices && dev_cnt < 2) {
2816                         error("not enough devices specified for -m option");
2817                         usage_error++;
2818                 }
2819                 if (!multi_devices && dev_cnt != 1) {
2820                         error("accepts only 1 device without -m option");
2821                         usage_error++;
2822                 }
2823         }
2824
2825         if (usage_error)
2826                 print_usage(1);
2827
2828         source = argv[optind];
2829         target = argv[optind + 1];
2830
2831         if (create && !strcmp(target, "-")) {
2832                 out = stdout;
2833         } else {
2834                 out = fopen(target, "w+");
2835                 if (!out) {
2836                         error("unable to create target file %s", target);
2837                         exit(1);
2838                 }
2839         }
2840
2841         if (compress_level > 0 || create == 0) {
2842                 if (num_threads == 0) {
2843                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2844
2845                         if (tmp <= 0)
2846                                 tmp = 1;
2847                         num_threads = tmp;
2848                 }
2849         } else {
2850                 num_threads = 0;
2851         }
2852
2853         if (create) {
2854                 ret = check_mounted(source);
2855                 if (ret < 0) {
2856                         warning("unable to check mount status of: %s",
2857                                         strerror(-ret));
2858                 } else if (ret) {
2859                         warning("%s already mounted, results may be inaccurate",
2860                                         source);
2861                 }
2862
2863                 ret = create_metadump(source, out, num_threads,
2864                                       compress_level, sanitize, walk_trees);
2865         } else {
2866                 ret = restore_metadump(source, out, old_restore, num_threads,
2867                                        0, target, multi_devices);
2868         }
2869         if (ret) {
2870                 error("%s failed: %s", (create) ? "create" : "restore",
2871                        strerror(errno));
2872                 goto out;
2873         }
2874
2875          /* extended support for multiple devices */
2876         if (!create && multi_devices) {
2877                 struct btrfs_fs_info *info;
2878                 u64 total_devs;
2879                 int i;
2880
2881                 info = open_ctree_fs_info(target, 0, 0, 0,
2882                                           OPEN_CTREE_PARTIAL |
2883                                           OPEN_CTREE_RESTORE);
2884                 if (!info) {
2885                         error("open ctree failed at %s", target);
2886                         return 1;
2887                 }
2888
2889                 total_devs = btrfs_super_num_devices(info->super_copy);
2890                 if (total_devs != dev_cnt) {
2891                         error("it needs %llu devices but has only %d",
2892                                 total_devs, dev_cnt);
2893                         close_ctree(info->chunk_root);
2894                         goto out;
2895                 }
2896
2897                 /* update super block on other disks */
2898                 for (i = 2; i <= dev_cnt; i++) {
2899                         ret = update_disk_super_on_device(info,
2900                                         argv[optind + i], (u64)i);
2901                         if (ret) {
2902                                 error("update disk superblock failed devid %d: %d",
2903                                         i, ret);
2904                                 close_ctree(info->chunk_root);
2905                                 exit(1);
2906                         }
2907                 }
2908
2909                 close_ctree(info->chunk_root);
2910
2911                 /* fix metadata block to map correct chunk */
2912                 ret = restore_metadump(source, out, 0, num_threads, 1,
2913                                        target, 1);
2914                 if (ret) {
2915                         error("unable to fixup metadump: %d", ret);
2916                         exit(1);
2917                 }
2918         }
2919 out:
2920         if (out == stdout) {
2921                 fflush(out);
2922         } else {
2923                 fclose(out);
2924                 if (ret && create) {
2925                         int unlink_ret;
2926
2927                         unlink_ret = unlink(target);
2928                         if (unlink_ret)
2929                                 error("unlink output file %s failed: %s",
2930                                                 target, strerror(errno));
2931                 }
2932         }
2933
2934         btrfs_close_all_devices();
2935
2936         return !!ret;
2937 }