btrfs-progs: add OPEN_CTREE_INVALIDATE_FST flag
[platform/upstream/btrfs-progs.git] / btrfs-image.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38
39 #define HEADER_MAGIC            0xbd5c25e27295668bULL
40 #define MAX_PENDING_SIZE        (256 * 1024)
41 #define BLOCK_SIZE              1024
42 #define BLOCK_MASK              (BLOCK_SIZE - 1)
43
44 #define COMPRESS_NONE           0
45 #define COMPRESS_ZLIB           1
46
47 #define MAX_WORKER_THREADS      (32)
48
49 struct meta_cluster_item {
50         __le64 bytenr;
51         __le32 size;
52 } __attribute__ ((__packed__));
53
54 struct meta_cluster_header {
55         __le64 magic;
56         __le64 bytenr;
57         __le32 nritems;
58         u8 compress;
59 } __attribute__ ((__packed__));
60
61 /* cluster header + index items + buffers */
62 struct meta_cluster {
63         struct meta_cluster_header header;
64         struct meta_cluster_item items[];
65 } __attribute__ ((__packed__));
66
67 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
68                            sizeof(struct meta_cluster_item))
69
70 struct fs_chunk {
71         u64 logical;
72         u64 physical;
73         /*
74          * physical_dup only store additonal physical for BTRFS_BLOCK_GROUP_DUP
75          * currently restore only support single and DUP
76          * TODO: modify this structure and the function related to this
77          * structure for support RAID*
78          */
79         u64 physical_dup;
80         u64 bytes;
81         struct rb_node l;
82         struct rb_node p;
83         struct list_head list;
84 };
85
86 struct async_work {
87         struct list_head list;
88         struct list_head ordered;
89         u64 start;
90         u64 size;
91         u8 *buffer;
92         size_t bufsize;
93         int error;
94 };
95
96 struct metadump_struct {
97         struct btrfs_root *root;
98         FILE *out;
99
100         union {
101                 struct meta_cluster cluster;
102                 char meta_cluster_bytes[BLOCK_SIZE];
103         };
104
105         pthread_t threads[MAX_WORKER_THREADS];
106         size_t num_threads;
107         pthread_mutex_t mutex;
108         pthread_cond_t cond;
109         struct rb_root name_tree;
110
111         struct list_head list;
112         struct list_head ordered;
113         size_t num_items;
114         size_t num_ready;
115
116         u64 pending_start;
117         u64 pending_size;
118
119         int compress_level;
120         int done;
121         int data;
122         int sanitize_names;
123
124         int error;
125 };
126
127 struct name {
128         struct rb_node n;
129         char *val;
130         char *sub;
131         u32 len;
132 };
133
134 struct mdrestore_struct {
135         FILE *in;
136         FILE *out;
137
138         pthread_t threads[MAX_WORKER_THREADS];
139         size_t num_threads;
140         pthread_mutex_t mutex;
141         pthread_cond_t cond;
142
143         struct rb_root chunk_tree;
144         struct rb_root physical_tree;
145         struct list_head list;
146         struct list_head overlapping_chunks;
147         size_t num_items;
148         u32 nodesize;
149         u64 devid;
150         u64 alloced_chunks;
151         u64 last_physical_offset;
152         u8 uuid[BTRFS_UUID_SIZE];
153         u8 fsid[BTRFS_FSID_SIZE];
154
155         int compress_method;
156         int done;
157         int error;
158         int old_restore;
159         int fixup_offset;
160         int multi_devices;
161         int clear_space_cache;
162         struct btrfs_fs_info *info;
163 };
164
165 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
166                                    u64 search, u64 cluster_bytenr);
167 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
168
169 static void csum_block(u8 *buf, size_t len)
170 {
171         u8 result[BTRFS_CRC32_SIZE];
172         u32 crc = ~(u32)0;
173         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
174         btrfs_csum_final(crc, result);
175         memcpy(buf, result, BTRFS_CRC32_SIZE);
176 }
177
178 static int has_name(struct btrfs_key *key)
179 {
180         switch (key->type) {
181         case BTRFS_DIR_ITEM_KEY:
182         case BTRFS_DIR_INDEX_KEY:
183         case BTRFS_INODE_REF_KEY:
184         case BTRFS_INODE_EXTREF_KEY:
185         case BTRFS_XATTR_ITEM_KEY:
186                 return 1;
187         default:
188                 break;
189         }
190
191         return 0;
192 }
193
194 static char *generate_garbage(u32 name_len)
195 {
196         char *buf = malloc(name_len);
197         int i;
198
199         if (!buf)
200                 return NULL;
201
202         for (i = 0; i < name_len; i++) {
203                 char c = rand_range(94) + 33;
204
205                 if (c == '/')
206                         c++;
207                 buf[i] = c;
208         }
209
210         return buf;
211 }
212
213 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
214 {
215         struct name *entry = rb_entry(a, struct name, n);
216         struct name *ins = rb_entry(b, struct name, n);
217         u32 len;
218
219         len = min(ins->len, entry->len);
220         return memcmp(ins->val, entry->val, len);
221 }
222
223 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
224 {
225         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
226         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
227
228         if (fuzz && ins->logical >= entry->logical &&
229             ins->logical < entry->logical + entry->bytes)
230                 return 0;
231
232         if (ins->logical < entry->logical)
233                 return -1;
234         else if (ins->logical > entry->logical)
235                 return 1;
236         return 0;
237 }
238
239 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
240 {
241         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
242         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
243
244         if (fuzz && ins->physical >= entry->physical &&
245             ins->physical < entry->physical + entry->bytes)
246                 return 0;
247
248         if (fuzz && entry->physical >= ins->physical &&
249             entry->physical < ins->physical + ins->bytes)
250                 return 0;
251
252         if (ins->physical < entry->physical)
253                 return -1;
254         else if (ins->physical > entry->physical)
255                 return 1;
256         return 0;
257 }
258
259 static void tree_insert(struct rb_root *root, struct rb_node *ins,
260                         int (*cmp)(struct rb_node *a, struct rb_node *b,
261                                    int fuzz))
262 {
263         struct rb_node ** p = &root->rb_node;
264         struct rb_node * parent = NULL;
265         int dir;
266
267         while(*p) {
268                 parent = *p;
269
270                 dir = cmp(*p, ins, 1);
271                 if (dir < 0)
272                         p = &(*p)->rb_left;
273                 else if (dir > 0)
274                         p = &(*p)->rb_right;
275                 else
276                         BUG();
277         }
278
279         rb_link_node(ins, parent, p);
280         rb_insert_color(ins, root);
281 }
282
283 static struct rb_node *tree_search(struct rb_root *root,
284                                    struct rb_node *search,
285                                    int (*cmp)(struct rb_node *a,
286                                               struct rb_node *b, int fuzz),
287                                    int fuzz)
288 {
289         struct rb_node *n = root->rb_node;
290         int dir;
291
292         while (n) {
293                 dir = cmp(n, search, fuzz);
294                 if (dir < 0)
295                         n = n->rb_left;
296                 else if (dir > 0)
297                         n = n->rb_right;
298                 else
299                         return n;
300         }
301
302         return NULL;
303 }
304
305 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
306                                u64 *size, u64 *physical_dup)
307 {
308         struct fs_chunk *fs_chunk;
309         struct rb_node *entry;
310         struct fs_chunk search;
311         u64 offset;
312
313         if (logical == BTRFS_SUPER_INFO_OFFSET)
314                 return logical;
315
316         search.logical = logical;
317         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
318         if (!entry) {
319                 if (mdres->in != stdin)
320                         warning("cannot find a chunk, using logical");
321                 return logical;
322         }
323         fs_chunk = rb_entry(entry, struct fs_chunk, l);
324         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
325                 BUG();
326         offset = search.logical - fs_chunk->logical;
327
328         if (physical_dup) {
329                 /* Only in dup case, physical_dup is not equal to 0 */
330                 if (fs_chunk->physical_dup)
331                         *physical_dup = fs_chunk->physical_dup + offset;
332                 else
333                         *physical_dup = 0;
334         }
335
336         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
337         return fs_chunk->physical + offset;
338 }
339
340
341 static char *find_collision(struct metadump_struct *md, char *name,
342                             u32 name_len)
343 {
344         struct name *val;
345         struct rb_node *entry;
346         struct name tmp;
347         unsigned long checksum;
348         int found = 0;
349         int i;
350
351         tmp.val = name;
352         tmp.len = name_len;
353         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
354         if (entry) {
355                 val = rb_entry(entry, struct name, n);
356                 free(name);
357                 return val->sub;
358         }
359
360         val = malloc(sizeof(struct name));
361         if (!val) {
362                 error("cannot sanitize name, not enough memory");
363                 free(name);
364                 return NULL;
365         }
366
367         memset(val, 0, sizeof(*val));
368
369         val->val = name;
370         val->len = name_len;
371         val->sub = malloc(name_len);
372         if (!val->sub) {
373                 error("cannot sanitize name, not enough memory");
374                 free(val);
375                 free(name);
376                 return NULL;
377         }
378
379         checksum = crc32c(~1, val->val, name_len);
380         memset(val->sub, ' ', name_len);
381         i = 0;
382         while (1) {
383                 if (crc32c(~1, val->sub, name_len) == checksum &&
384                     memcmp(val->sub, val->val, val->len)) {
385                         found = 1;
386                         break;
387                 }
388
389                 if (val->sub[i] == 127) {
390                         do {
391                                 i++;
392                                 if (i >= name_len)
393                                         break;
394                         } while (val->sub[i] == 127);
395
396                         if (i >= name_len)
397                                 break;
398                         val->sub[i]++;
399                         if (val->sub[i] == '/')
400                                 val->sub[i]++;
401                         memset(val->sub, ' ', i);
402                         i = 0;
403                         continue;
404                 } else {
405                         val->sub[i]++;
406                         if (val->sub[i] == '/')
407                                 val->sub[i]++;
408                 }
409         }
410
411         if (!found) {
412                 warning(
413 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
414                         val->len, val->val);
415                 for (i = 0; i < name_len; i++) {
416                         char c = rand_range(94) + 33;
417
418                         if (c == '/')
419                                 c++;
420                         val->sub[i] = c;
421                 }
422         }
423
424         tree_insert(&md->name_tree, &val->n, name_cmp);
425         return val->sub;
426 }
427
428 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
429                               int slot)
430 {
431         struct btrfs_dir_item *dir_item;
432         char *buf;
433         char *garbage;
434         unsigned long name_ptr;
435         u32 total_len;
436         u32 cur = 0;
437         u32 this_len;
438         u32 name_len;
439         int free_garbage = (md->sanitize_names == 1);
440
441         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
442         total_len = btrfs_item_size_nr(eb, slot);
443         while (cur < total_len) {
444                 this_len = sizeof(*dir_item) +
445                         btrfs_dir_name_len(eb, dir_item) +
446                         btrfs_dir_data_len(eb, dir_item);
447                 name_ptr = (unsigned long)(dir_item + 1);
448                 name_len = btrfs_dir_name_len(eb, dir_item);
449
450                 if (md->sanitize_names > 1) {
451                         buf = malloc(name_len);
452                         if (!buf) {
453                                 error("cannot sanitize name, not enough memory");
454                                 return;
455                         }
456                         read_extent_buffer(eb, buf, name_ptr, name_len);
457                         garbage = find_collision(md, buf, name_len);
458                 } else {
459                         garbage = generate_garbage(name_len);
460                 }
461                 if (!garbage) {
462                         error("cannot sanitize name, not enough memory");
463                         return;
464                 }
465                 write_extent_buffer(eb, garbage, name_ptr, name_len);
466                 cur += this_len;
467                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
468                                                      this_len);
469                 if (free_garbage)
470                         free(garbage);
471         }
472 }
473
474 static void sanitize_inode_ref(struct metadump_struct *md,
475                                struct extent_buffer *eb, int slot, int ext)
476 {
477         struct btrfs_inode_extref *extref;
478         struct btrfs_inode_ref *ref;
479         char *garbage, *buf;
480         unsigned long ptr;
481         unsigned long name_ptr;
482         u32 item_size;
483         u32 cur_offset = 0;
484         int len;
485         int free_garbage = (md->sanitize_names == 1);
486
487         item_size = btrfs_item_size_nr(eb, slot);
488         ptr = btrfs_item_ptr_offset(eb, slot);
489         while (cur_offset < item_size) {
490                 if (ext) {
491                         extref = (struct btrfs_inode_extref *)(ptr +
492                                                                cur_offset);
493                         name_ptr = (unsigned long)(&extref->name);
494                         len = btrfs_inode_extref_name_len(eb, extref);
495                         cur_offset += sizeof(*extref);
496                 } else {
497                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
498                         len = btrfs_inode_ref_name_len(eb, ref);
499                         name_ptr = (unsigned long)(ref + 1);
500                         cur_offset += sizeof(*ref);
501                 }
502                 cur_offset += len;
503
504                 if (md->sanitize_names > 1) {
505                         buf = malloc(len);
506                         if (!buf) {
507                                 error("cannot sanitize name, not enough memory");
508                                 return;
509                         }
510                         read_extent_buffer(eb, buf, name_ptr, len);
511                         garbage = find_collision(md, buf, len);
512                 } else {
513                         garbage = generate_garbage(len);
514                 }
515
516                 if (!garbage) {
517                         error("cannot sanitize name, not enough memory");
518                         return;
519                 }
520                 write_extent_buffer(eb, garbage, name_ptr, len);
521                 if (free_garbage)
522                         free(garbage);
523         }
524 }
525
526 static void sanitize_xattr(struct metadump_struct *md,
527                            struct extent_buffer *eb, int slot)
528 {
529         struct btrfs_dir_item *dir_item;
530         unsigned long data_ptr;
531         u32 data_len;
532
533         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
534         data_len = btrfs_dir_data_len(eb, dir_item);
535
536         data_ptr = (unsigned long)((char *)(dir_item + 1) +
537                                    btrfs_dir_name_len(eb, dir_item));
538         memset_extent_buffer(eb, 0, data_ptr, data_len);
539 }
540
541 static void sanitize_name(struct metadump_struct *md, u8 *dst,
542                           struct extent_buffer *src, struct btrfs_key *key,
543                           int slot)
544 {
545         struct extent_buffer *eb;
546
547         eb = alloc_dummy_eb(src->start, src->len);
548         if (!eb) {
549                 error("cannot sanitize name, not enough memory");
550                 return;
551         }
552
553         memcpy(eb->data, dst, eb->len);
554
555         switch (key->type) {
556         case BTRFS_DIR_ITEM_KEY:
557         case BTRFS_DIR_INDEX_KEY:
558                 sanitize_dir_item(md, eb, slot);
559                 break;
560         case BTRFS_INODE_REF_KEY:
561                 sanitize_inode_ref(md, eb, slot, 0);
562                 break;
563         case BTRFS_INODE_EXTREF_KEY:
564                 sanitize_inode_ref(md, eb, slot, 1);
565                 break;
566         case BTRFS_XATTR_ITEM_KEY:
567                 sanitize_xattr(md, eb, slot);
568                 break;
569         default:
570                 break;
571         }
572
573         memcpy(dst, eb->data, eb->len);
574         free(eb);
575 }
576
577 /*
578  * zero inline extents and csum items
579  */
580 static void zero_items(struct metadump_struct *md, u8 *dst,
581                        struct extent_buffer *src)
582 {
583         struct btrfs_file_extent_item *fi;
584         struct btrfs_item *item;
585         struct btrfs_key key;
586         u32 nritems = btrfs_header_nritems(src);
587         size_t size;
588         unsigned long ptr;
589         int i, extent_type;
590
591         for (i = 0; i < nritems; i++) {
592                 item = btrfs_item_nr(i);
593                 btrfs_item_key_to_cpu(src, &key, i);
594                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
595                         size = btrfs_item_size_nr(src, i);
596                         memset(dst + btrfs_leaf_data(src) +
597                                btrfs_item_offset_nr(src, i), 0, size);
598                         continue;
599                 }
600
601                 if (md->sanitize_names && has_name(&key)) {
602                         sanitize_name(md, dst, src, &key, i);
603                         continue;
604                 }
605
606                 if (key.type != BTRFS_EXTENT_DATA_KEY)
607                         continue;
608
609                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
610                 extent_type = btrfs_file_extent_type(src, fi);
611                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
612                         continue;
613
614                 ptr = btrfs_file_extent_inline_start(fi);
615                 size = btrfs_file_extent_inline_item_len(src, item);
616                 memset(dst + ptr, 0, size);
617         }
618 }
619
620 /*
621  * copy buffer and zero useless data in the buffer
622  */
623 static void copy_buffer(struct metadump_struct *md, u8 *dst,
624                         struct extent_buffer *src)
625 {
626         int level;
627         size_t size;
628         u32 nritems;
629
630         memcpy(dst, src->data, src->len);
631         if (src->start == BTRFS_SUPER_INFO_OFFSET)
632                 return;
633
634         level = btrfs_header_level(src);
635         nritems = btrfs_header_nritems(src);
636
637         if (nritems == 0) {
638                 size = sizeof(struct btrfs_header);
639                 memset(dst + size, 0, src->len - size);
640         } else if (level == 0) {
641                 size = btrfs_leaf_data(src) +
642                         btrfs_item_offset_nr(src, nritems - 1) -
643                         btrfs_item_nr_offset(nritems);
644                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
645                 zero_items(md, dst, src);
646         } else {
647                 size = offsetof(struct btrfs_node, ptrs) +
648                         sizeof(struct btrfs_key_ptr) * nritems;
649                 memset(dst + size, 0, src->len - size);
650         }
651         csum_block(dst, src->len);
652 }
653
654 static void *dump_worker(void *data)
655 {
656         struct metadump_struct *md = (struct metadump_struct *)data;
657         struct async_work *async;
658         int ret;
659
660         while (1) {
661                 pthread_mutex_lock(&md->mutex);
662                 while (list_empty(&md->list)) {
663                         if (md->done) {
664                                 pthread_mutex_unlock(&md->mutex);
665                                 goto out;
666                         }
667                         pthread_cond_wait(&md->cond, &md->mutex);
668                 }
669                 async = list_entry(md->list.next, struct async_work, list);
670                 list_del_init(&async->list);
671                 pthread_mutex_unlock(&md->mutex);
672
673                 if (md->compress_level > 0) {
674                         u8 *orig = async->buffer;
675
676                         async->bufsize = compressBound(async->size);
677                         async->buffer = malloc(async->bufsize);
678                         if (!async->buffer) {
679                                 error("not enough memory for async buffer");
680                                 pthread_mutex_lock(&md->mutex);
681                                 if (!md->error)
682                                         md->error = -ENOMEM;
683                                 pthread_mutex_unlock(&md->mutex);
684                                 pthread_exit(NULL);
685                         }
686
687                         ret = compress2(async->buffer,
688                                          (unsigned long *)&async->bufsize,
689                                          orig, async->size, md->compress_level);
690
691                         if (ret != Z_OK)
692                                 async->error = 1;
693
694                         free(orig);
695                 }
696
697                 pthread_mutex_lock(&md->mutex);
698                 md->num_ready++;
699                 pthread_mutex_unlock(&md->mutex);
700         }
701 out:
702         pthread_exit(NULL);
703 }
704
705 static void meta_cluster_init(struct metadump_struct *md, u64 start)
706 {
707         struct meta_cluster_header *header;
708
709         md->num_items = 0;
710         md->num_ready = 0;
711         header = &md->cluster.header;
712         header->magic = cpu_to_le64(HEADER_MAGIC);
713         header->bytenr = cpu_to_le64(start);
714         header->nritems = cpu_to_le32(0);
715         header->compress = md->compress_level > 0 ?
716                            COMPRESS_ZLIB : COMPRESS_NONE;
717 }
718
719 static void metadump_destroy(struct metadump_struct *md, int num_threads)
720 {
721         int i;
722         struct rb_node *n;
723
724         pthread_mutex_lock(&md->mutex);
725         md->done = 1;
726         pthread_cond_broadcast(&md->cond);
727         pthread_mutex_unlock(&md->mutex);
728
729         for (i = 0; i < num_threads; i++)
730                 pthread_join(md->threads[i], NULL);
731
732         pthread_cond_destroy(&md->cond);
733         pthread_mutex_destroy(&md->mutex);
734
735         while ((n = rb_first(&md->name_tree))) {
736                 struct name *name;
737
738                 name = rb_entry(n, struct name, n);
739                 rb_erase(n, &md->name_tree);
740                 free(name->val);
741                 free(name->sub);
742                 free(name);
743         }
744 }
745
746 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
747                          FILE *out, int num_threads, int compress_level,
748                          int sanitize_names)
749 {
750         int i, ret = 0;
751
752         memset(md, 0, sizeof(*md));
753         INIT_LIST_HEAD(&md->list);
754         INIT_LIST_HEAD(&md->ordered);
755         md->root = root;
756         md->out = out;
757         md->pending_start = (u64)-1;
758         md->compress_level = compress_level;
759         md->sanitize_names = sanitize_names;
760         if (sanitize_names > 1)
761                 crc32c_optimization_init();
762
763         md->name_tree.rb_node = NULL;
764         md->num_threads = num_threads;
765         pthread_cond_init(&md->cond, NULL);
766         pthread_mutex_init(&md->mutex, NULL);
767         meta_cluster_init(md, 0);
768
769         if (!num_threads)
770                 return 0;
771
772         for (i = 0; i < num_threads; i++) {
773                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
774                 if (ret)
775                         break;
776         }
777
778         if (ret)
779                 metadump_destroy(md, i + 1);
780
781         return ret;
782 }
783
784 static int write_zero(FILE *out, size_t size)
785 {
786         static char zero[BLOCK_SIZE];
787         return fwrite(zero, size, 1, out);
788 }
789
790 static int write_buffers(struct metadump_struct *md, u64 *next)
791 {
792         struct meta_cluster_header *header = &md->cluster.header;
793         struct meta_cluster_item *item;
794         struct async_work *async;
795         u64 bytenr = 0;
796         u32 nritems = 0;
797         int ret;
798         int err = 0;
799
800         if (list_empty(&md->ordered))
801                 goto out;
802
803         /* wait until all buffers are compressed */
804         while (!err && md->num_items > md->num_ready) {
805                 struct timespec ts = {
806                         .tv_sec = 0,
807                         .tv_nsec = 10000000,
808                 };
809                 pthread_mutex_unlock(&md->mutex);
810                 nanosleep(&ts, NULL);
811                 pthread_mutex_lock(&md->mutex);
812                 err = md->error;
813         }
814
815         if (err) {
816                 error("one of the threads failed: %s", strerror(-err));
817                 goto out;
818         }
819
820         /* setup and write index block */
821         list_for_each_entry(async, &md->ordered, ordered) {
822                 item = &md->cluster.items[nritems];
823                 item->bytenr = cpu_to_le64(async->start);
824                 item->size = cpu_to_le32(async->bufsize);
825                 nritems++;
826         }
827         header->nritems = cpu_to_le32(nritems);
828
829         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
830         if (ret != 1) {
831                 error("unable to write out cluster: %s", strerror(errno));
832                 return -errno;
833         }
834
835         /* write buffers */
836         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
837         while (!list_empty(&md->ordered)) {
838                 async = list_entry(md->ordered.next, struct async_work,
839                                    ordered);
840                 list_del_init(&async->ordered);
841
842                 bytenr += async->bufsize;
843                 if (!err)
844                         ret = fwrite(async->buffer, async->bufsize, 1,
845                                      md->out);
846                 if (ret != 1) {
847                         error("unable to write out cluster: %s",
848                                 strerror(errno));
849                         err = -errno;
850                         ret = 0;
851                 }
852
853                 free(async->buffer);
854                 free(async);
855         }
856
857         /* zero unused space in the last block */
858         if (!err && bytenr & BLOCK_MASK) {
859                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
860
861                 bytenr += size;
862                 ret = write_zero(md->out, size);
863                 if (ret != 1) {
864                         error("unable to zero out buffer: %s",
865                                 strerror(errno));
866                         err = -errno;
867                 }
868         }
869 out:
870         *next = bytenr;
871         return err;
872 }
873
874 static int read_data_extent(struct metadump_struct *md,
875                             struct async_work *async)
876 {
877         struct btrfs_root *root = md->root;
878         u64 bytes_left = async->size;
879         u64 logical = async->start;
880         u64 offset = 0;
881         u64 read_len;
882         int num_copies;
883         int cur_mirror;
884         int ret;
885
886         num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, logical,
887                                       bytes_left);
888
889         /* Try our best to read data, just like read_tree_block() */
890         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
891                 while (bytes_left) {
892                         read_len = bytes_left;
893                         ret = read_extent_data(root,
894                                         (char *)(async->buffer + offset),
895                                         logical, &read_len, cur_mirror);
896                         if (ret < 0)
897                                 break;
898                         offset += read_len;
899                         logical += read_len;
900                         bytes_left -= read_len;
901                 }
902         }
903         if (bytes_left)
904                 return -EIO;
905         return 0;
906 }
907
908 static int get_dev_fd(struct btrfs_root *root)
909 {
910         struct btrfs_device *dev;
911
912         dev = list_first_entry(&root->fs_info->fs_devices->devices,
913                                struct btrfs_device, dev_list);
914         return dev->fd;
915 }
916
917 static int flush_pending(struct metadump_struct *md, int done)
918 {
919         struct async_work *async = NULL;
920         struct extent_buffer *eb;
921         u64 blocksize = md->root->nodesize;
922         u64 start = 0;
923         u64 size;
924         size_t offset;
925         int ret = 0;
926
927         if (md->pending_size) {
928                 async = calloc(1, sizeof(*async));
929                 if (!async)
930                         return -ENOMEM;
931
932                 async->start = md->pending_start;
933                 async->size = md->pending_size;
934                 async->bufsize = async->size;
935                 async->buffer = malloc(async->bufsize);
936                 if (!async->buffer) {
937                         free(async);
938                         return -ENOMEM;
939                 }
940                 offset = 0;
941                 start = async->start;
942                 size = async->size;
943
944                 if (md->data) {
945                         ret = read_data_extent(md, async);
946                         if (ret) {
947                                 free(async->buffer);
948                                 free(async);
949                                 return ret;
950                         }
951                 }
952
953                 /*
954                  * Balance can make the mapping not cover the super block, so
955                  * just copy directly from one of the devices.
956                  */
957                 if (start == BTRFS_SUPER_INFO_OFFSET) {
958                         int fd = get_dev_fd(md->root);
959
960                         ret = pread64(fd, async->buffer, size, start);
961                         if (ret < size) {
962                                 free(async->buffer);
963                                 free(async);
964                                 error("unable to read superblock at %llu: %s",
965                                                 (unsigned long long)start,
966                                                 strerror(errno));
967                                 return -errno;
968                         }
969                         size = 0;
970                         ret = 0;
971                 }
972
973                 while (!md->data && size > 0) {
974                         u64 this_read = min(blocksize, size);
975                         eb = read_tree_block(md->root, start, this_read, 0);
976                         if (!extent_buffer_uptodate(eb)) {
977                                 free(async->buffer);
978                                 free(async);
979                                 error("unable to read metadata block %llu",
980                                         (unsigned long long)start);
981                                 return -EIO;
982                         }
983                         copy_buffer(md, async->buffer + offset, eb);
984                         free_extent_buffer(eb);
985                         start += this_read;
986                         offset += this_read;
987                         size -= this_read;
988                 }
989
990                 md->pending_start = (u64)-1;
991                 md->pending_size = 0;
992         } else if (!done) {
993                 return 0;
994         }
995
996         pthread_mutex_lock(&md->mutex);
997         if (async) {
998                 list_add_tail(&async->ordered, &md->ordered);
999                 md->num_items++;
1000                 if (md->compress_level > 0) {
1001                         list_add_tail(&async->list, &md->list);
1002                         pthread_cond_signal(&md->cond);
1003                 } else {
1004                         md->num_ready++;
1005                 }
1006         }
1007         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1008                 ret = write_buffers(md, &start);
1009                 if (ret)
1010                         error("unable to write buffers: %s", strerror(-ret));
1011                 else
1012                         meta_cluster_init(md, start);
1013         }
1014         pthread_mutex_unlock(&md->mutex);
1015         return ret;
1016 }
1017
1018 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1019                       int data)
1020 {
1021         int ret;
1022         if (md->data != data ||
1023             md->pending_size + size > MAX_PENDING_SIZE ||
1024             md->pending_start + md->pending_size != start) {
1025                 ret = flush_pending(md, 0);
1026                 if (ret)
1027                         return ret;
1028                 md->pending_start = start;
1029         }
1030         readahead_tree_block(md->root, start, size, 0);
1031         md->pending_size += size;
1032         md->data = data;
1033         return 0;
1034 }
1035
1036 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1037 static int is_tree_block(struct btrfs_root *extent_root,
1038                          struct btrfs_path *path, u64 bytenr)
1039 {
1040         struct extent_buffer *leaf;
1041         struct btrfs_key key;
1042         u64 ref_objectid;
1043         int ret;
1044
1045         leaf = path->nodes[0];
1046         while (1) {
1047                 struct btrfs_extent_ref_v0 *ref_item;
1048                 path->slots[0]++;
1049                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1050                         ret = btrfs_next_leaf(extent_root, path);
1051                         if (ret < 0)
1052                                 return ret;
1053                         if (ret > 0)
1054                                 break;
1055                         leaf = path->nodes[0];
1056                 }
1057                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1058                 if (key.objectid != bytenr)
1059                         break;
1060                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1061                         continue;
1062                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1063                                           struct btrfs_extent_ref_v0);
1064                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1065                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1066                         return 1;
1067                 break;
1068         }
1069         return 0;
1070 }
1071 #endif
1072
1073 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1074                             struct metadump_struct *metadump, int root_tree)
1075 {
1076         struct extent_buffer *tmp;
1077         struct btrfs_root_item *ri;
1078         struct btrfs_key key;
1079         u64 bytenr;
1080         int level;
1081         int nritems = 0;
1082         int i = 0;
1083         int ret;
1084
1085         ret = add_extent(btrfs_header_bytenr(eb), root->nodesize, metadump, 0);
1086         if (ret) {
1087                 error("unable to add metadata block %llu: %d",
1088                                 btrfs_header_bytenr(eb), ret);
1089                 return ret;
1090         }
1091
1092         if (btrfs_header_level(eb) == 0 && !root_tree)
1093                 return 0;
1094
1095         level = btrfs_header_level(eb);
1096         nritems = btrfs_header_nritems(eb);
1097         for (i = 0; i < nritems; i++) {
1098                 if (level == 0) {
1099                         btrfs_item_key_to_cpu(eb, &key, i);
1100                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1101                                 continue;
1102                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1103                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1104                         tmp = read_tree_block(root, bytenr, root->nodesize, 0);
1105                         if (!extent_buffer_uptodate(tmp)) {
1106                                 error("unable to read log root block");
1107                                 return -EIO;
1108                         }
1109                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1110                         free_extent_buffer(tmp);
1111                         if (ret)
1112                                 return ret;
1113                 } else {
1114                         bytenr = btrfs_node_blockptr(eb, i);
1115                         tmp = read_tree_block(root, bytenr, root->nodesize, 0);
1116                         if (!extent_buffer_uptodate(tmp)) {
1117                                 error("unable to read log root block");
1118                                 return -EIO;
1119                         }
1120                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1121                         free_extent_buffer(tmp);
1122                         if (ret)
1123                                 return ret;
1124                 }
1125         }
1126
1127         return 0;
1128 }
1129
1130 static int copy_log_trees(struct btrfs_root *root,
1131                           struct metadump_struct *metadump,
1132                           struct btrfs_path *path)
1133 {
1134         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1135
1136         if (blocknr == 0)
1137                 return 0;
1138
1139         if (!root->fs_info->log_root_tree ||
1140             !root->fs_info->log_root_tree->node) {
1141                 error("unable to copy tree log, it has not been setup");
1142                 return -EIO;
1143         }
1144
1145         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1146                                 metadump, 1);
1147 }
1148
1149 static int copy_space_cache(struct btrfs_root *root,
1150                             struct metadump_struct *metadump,
1151                             struct btrfs_path *path)
1152 {
1153         struct extent_buffer *leaf;
1154         struct btrfs_file_extent_item *fi;
1155         struct btrfs_key key;
1156         u64 bytenr, num_bytes;
1157         int ret;
1158
1159         root = root->fs_info->tree_root;
1160
1161         key.objectid = 0;
1162         key.type = BTRFS_EXTENT_DATA_KEY;
1163         key.offset = 0;
1164
1165         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1166         if (ret < 0) {
1167                 error("free space inode not found: %d", ret);
1168                 return ret;
1169         }
1170
1171         leaf = path->nodes[0];
1172
1173         while (1) {
1174                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1175                         ret = btrfs_next_leaf(root, path);
1176                         if (ret < 0) {
1177                                 error("cannot go to next leaf %d", ret);
1178                                 return ret;
1179                         }
1180                         if (ret > 0)
1181                                 break;
1182                         leaf = path->nodes[0];
1183                 }
1184
1185                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1186                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1187                         path->slots[0]++;
1188                         continue;
1189                 }
1190
1191                 fi = btrfs_item_ptr(leaf, path->slots[0],
1192                                     struct btrfs_file_extent_item);
1193                 if (btrfs_file_extent_type(leaf, fi) !=
1194                     BTRFS_FILE_EXTENT_REG) {
1195                         path->slots[0]++;
1196                         continue;
1197                 }
1198
1199                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1200                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1201                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1202                 if (ret) {
1203                         error("unable to add space cache blocks %d", ret);
1204                         btrfs_release_path(path);
1205                         return ret;
1206                 }
1207                 path->slots[0]++;
1208         }
1209
1210         return 0;
1211 }
1212
1213 static int copy_from_extent_tree(struct metadump_struct *metadump,
1214                                  struct btrfs_path *path)
1215 {
1216         struct btrfs_root *extent_root;
1217         struct extent_buffer *leaf;
1218         struct btrfs_extent_item *ei;
1219         struct btrfs_key key;
1220         u64 bytenr;
1221         u64 num_bytes;
1222         int ret;
1223
1224         extent_root = metadump->root->fs_info->extent_root;
1225         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1226         key.objectid = bytenr;
1227         key.type = BTRFS_EXTENT_ITEM_KEY;
1228         key.offset = 0;
1229
1230         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1231         if (ret < 0) {
1232                 error("extent root not found: %d", ret);
1233                 return ret;
1234         }
1235         ret = 0;
1236
1237         leaf = path->nodes[0];
1238
1239         while (1) {
1240                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1241                         ret = btrfs_next_leaf(extent_root, path);
1242                         if (ret < 0) {
1243                                 error("cannot go to next leaf %d", ret);
1244                                 break;
1245                         }
1246                         if (ret > 0) {
1247                                 ret = 0;
1248                                 break;
1249                         }
1250                         leaf = path->nodes[0];
1251                 }
1252
1253                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1254                 if (key.objectid < bytenr ||
1255                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1256                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1257                         path->slots[0]++;
1258                         continue;
1259                 }
1260
1261                 bytenr = key.objectid;
1262                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1263                         num_bytes = extent_root->nodesize;
1264                 } else {
1265                         num_bytes = key.offset;
1266                 }
1267
1268                 if (num_bytes == 0) {
1269                         error("extent length 0 at bytenr %llu key type %d",
1270                                         (unsigned long long)bytenr, key.type);
1271                         ret = -EIO;
1272                         break;
1273                 }
1274
1275                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1276                         ei = btrfs_item_ptr(leaf, path->slots[0],
1277                                             struct btrfs_extent_item);
1278                         if (btrfs_extent_flags(leaf, ei) &
1279                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1280                                 ret = add_extent(bytenr, num_bytes, metadump,
1281                                                  0);
1282                                 if (ret) {
1283                                         error("unable to add block %llu: %d",
1284                                                 (unsigned long long)bytenr, ret);
1285                                         break;
1286                                 }
1287                         }
1288                 } else {
1289 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1290                         ret = is_tree_block(extent_root, path, bytenr);
1291                         if (ret < 0) {
1292                                 error("failed to check tree block %llu: %d",
1293                                         (unsigned long long)bytenr, ret);
1294                                 break;
1295                         }
1296
1297                         if (ret) {
1298                                 ret = add_extent(bytenr, num_bytes, metadump,
1299                                                  0);
1300                                 if (ret) {
1301                                         error("unable to add block %llu: %d",
1302                                                 (unsigned long long)bytenr, ret);
1303                                         break;
1304                                 }
1305                         }
1306                         ret = 0;
1307 #else
1308                         error(
1309         "either extent tree is corrupted or you haven't built with V0 support");
1310                         ret = -EIO;
1311                         break;
1312 #endif
1313                 }
1314                 bytenr += num_bytes;
1315         }
1316
1317         btrfs_release_path(path);
1318
1319         return ret;
1320 }
1321
1322 static int create_metadump(const char *input, FILE *out, int num_threads,
1323                            int compress_level, int sanitize, int walk_trees)
1324 {
1325         struct btrfs_root *root;
1326         struct btrfs_path path;
1327         struct metadump_struct metadump;
1328         int ret;
1329         int err = 0;
1330
1331         root = open_ctree(input, 0, 0);
1332         if (!root) {
1333                 error("open ctree failed");
1334                 return -EIO;
1335         }
1336
1337         ret = metadump_init(&metadump, root, out, num_threads,
1338                             compress_level, sanitize);
1339         if (ret) {
1340                 error("failed to initialize metadump: %d", ret);
1341                 close_ctree(root);
1342                 return ret;
1343         }
1344
1345         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1346                         &metadump, 0);
1347         if (ret) {
1348                 error("unable to add metadata: %d", ret);
1349                 err = ret;
1350                 goto out;
1351         }
1352
1353         btrfs_init_path(&path);
1354
1355         if (walk_trees) {
1356                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1357                                        &metadump, 1);
1358                 if (ret) {
1359                         err = ret;
1360                         goto out;
1361                 }
1362
1363                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1364                                        &metadump, 1);
1365                 if (ret) {
1366                         err = ret;
1367                         goto out;
1368                 }
1369         } else {
1370                 ret = copy_from_extent_tree(&metadump, &path);
1371                 if (ret) {
1372                         err = ret;
1373                         goto out;
1374                 }
1375         }
1376
1377         ret = copy_log_trees(root, &metadump, &path);
1378         if (ret) {
1379                 err = ret;
1380                 goto out;
1381         }
1382
1383         ret = copy_space_cache(root, &metadump, &path);
1384 out:
1385         ret = flush_pending(&metadump, 1);
1386         if (ret) {
1387                 if (!err)
1388                         err = ret;
1389                 error("failed to flush pending data: %d", ret);
1390         }
1391
1392         metadump_destroy(&metadump, num_threads);
1393
1394         btrfs_release_path(&path);
1395         ret = close_ctree(root);
1396         return err ? err : ret;
1397 }
1398
1399 static void update_super_old(u8 *buffer)
1400 {
1401         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1402         struct btrfs_chunk *chunk;
1403         struct btrfs_disk_key *key;
1404         u32 sectorsize = btrfs_super_sectorsize(super);
1405         u64 flags = btrfs_super_flags(super);
1406
1407         flags |= BTRFS_SUPER_FLAG_METADUMP;
1408         btrfs_set_super_flags(super, flags);
1409
1410         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1411         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1412                                        sizeof(struct btrfs_disk_key));
1413
1414         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1415         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1416         btrfs_set_disk_key_offset(key, 0);
1417
1418         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1419         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1420         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1421         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1422         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1423         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1424         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1425         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1426         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1427         chunk->stripe.devid = super->dev_item.devid;
1428         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1429         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1430         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1431         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1432 }
1433
1434 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1435 {
1436         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1437         struct btrfs_chunk *chunk;
1438         struct btrfs_disk_key *disk_key;
1439         struct btrfs_key key;
1440         u64 flags = btrfs_super_flags(super);
1441         u32 new_array_size = 0;
1442         u32 array_size;
1443         u32 cur = 0;
1444         u8 *ptr, *write_ptr;
1445         int old_num_stripes;
1446
1447         write_ptr = ptr = super->sys_chunk_array;
1448         array_size = btrfs_super_sys_array_size(super);
1449
1450         while (cur < array_size) {
1451                 disk_key = (struct btrfs_disk_key *)ptr;
1452                 btrfs_disk_key_to_cpu(&key, disk_key);
1453
1454                 new_array_size += sizeof(*disk_key);
1455                 memmove(write_ptr, ptr, sizeof(*disk_key));
1456
1457                 write_ptr += sizeof(*disk_key);
1458                 ptr += sizeof(*disk_key);
1459                 cur += sizeof(*disk_key);
1460
1461                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1462                         u64 type, physical, physical_dup, size = 0;
1463
1464                         chunk = (struct btrfs_chunk *)ptr;
1465                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1466                         chunk = (struct btrfs_chunk *)write_ptr;
1467
1468                         memmove(write_ptr, ptr, sizeof(*chunk));
1469                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1470                         type = btrfs_stack_chunk_type(chunk);
1471                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1472                                 new_array_size += sizeof(struct btrfs_stripe);
1473                                 write_ptr += sizeof(struct btrfs_stripe);
1474                         } else {
1475                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1476                                 btrfs_set_stack_chunk_type(chunk,
1477                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1478                         }
1479                         chunk->stripe.devid = super->dev_item.devid;
1480                         physical = logical_to_physical(mdres, key.offset,
1481                                                        &size, &physical_dup);
1482                         if (size != (u64)-1)
1483                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1484                                                               physical);
1485                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1486                                BTRFS_UUID_SIZE);
1487                         new_array_size += sizeof(*chunk);
1488                 } else {
1489                         error("bogus key in the sys array %d", key.type);
1490                         return -EIO;
1491                 }
1492                 write_ptr += sizeof(*chunk);
1493                 ptr += btrfs_chunk_item_size(old_num_stripes);
1494                 cur += btrfs_chunk_item_size(old_num_stripes);
1495         }
1496
1497         if (mdres->clear_space_cache)
1498                 btrfs_set_super_cache_generation(super, 0);
1499
1500         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1501         btrfs_set_super_flags(super, flags);
1502         btrfs_set_super_sys_array_size(super, new_array_size);
1503         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1504
1505         return 0;
1506 }
1507
1508 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1509 {
1510         struct extent_buffer *eb;
1511
1512         eb = calloc(1, sizeof(struct extent_buffer) + size);
1513         if (!eb)
1514                 return NULL;
1515
1516         eb->start = bytenr;
1517         eb->len = size;
1518         return eb;
1519 }
1520
1521 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1522 {
1523         struct btrfs_item *item;
1524         u32 nritems;
1525         u32 old_size;
1526         u32 old_data_start;
1527         u32 size_diff;
1528         u32 data_end;
1529         int i;
1530
1531         old_size = btrfs_item_size_nr(eb, slot);
1532         if (old_size == new_size)
1533                 return;
1534
1535         nritems = btrfs_header_nritems(eb);
1536         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1537
1538         old_data_start = btrfs_item_offset_nr(eb, slot);
1539         size_diff = old_size - new_size;
1540
1541         for (i = slot; i < nritems; i++) {
1542                 u32 ioff;
1543                 item = btrfs_item_nr(i);
1544                 ioff = btrfs_item_offset(eb, item);
1545                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1546         }
1547
1548         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1549                               btrfs_leaf_data(eb) + data_end,
1550                               old_data_start + new_size - data_end);
1551         item = btrfs_item_nr(slot);
1552         btrfs_set_item_size(eb, item, new_size);
1553 }
1554
1555 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1556                                   struct async_work *async, u8 *buffer,
1557                                   size_t size)
1558 {
1559         struct extent_buffer *eb;
1560         size_t size_left = size;
1561         u64 bytenr = async->start;
1562         int i;
1563
1564         if (size_left % mdres->nodesize)
1565                 return 0;
1566
1567         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1568         if (!eb)
1569                 return -ENOMEM;
1570
1571         while (size_left) {
1572                 eb->start = bytenr;
1573                 memcpy(eb->data, buffer, mdres->nodesize);
1574
1575                 if (btrfs_header_bytenr(eb) != bytenr)
1576                         break;
1577                 if (memcmp(mdres->fsid,
1578                            eb->data + offsetof(struct btrfs_header, fsid),
1579                            BTRFS_FSID_SIZE))
1580                         break;
1581
1582                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1583                         goto next;
1584
1585                 if (btrfs_header_level(eb) != 0)
1586                         goto next;
1587
1588                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1589                         struct btrfs_chunk *chunk;
1590                         struct btrfs_key key;
1591                         u64 type, physical, physical_dup, size = (u64)-1;
1592
1593                         btrfs_item_key_to_cpu(eb, &key, i);
1594                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1595                                 continue;
1596
1597                         size = 0;
1598                         physical = logical_to_physical(mdres, key.offset,
1599                                                        &size, &physical_dup);
1600
1601                         if (!physical_dup)
1602                                 truncate_item(eb, i, sizeof(*chunk));
1603                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1604
1605
1606                         /* Zero out the RAID profile */
1607                         type = btrfs_chunk_type(eb, chunk);
1608                         type &= (BTRFS_BLOCK_GROUP_DATA |
1609                                  BTRFS_BLOCK_GROUP_SYSTEM |
1610                                  BTRFS_BLOCK_GROUP_METADATA |
1611                                  BTRFS_BLOCK_GROUP_DUP);
1612                         btrfs_set_chunk_type(eb, chunk, type);
1613
1614                         if (!physical_dup)
1615                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1616                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1617                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1618                         if (size != (u64)-1)
1619                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1620                                                            physical);
1621                         /* update stripe 2 offset */
1622                         if (physical_dup)
1623                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1624                                                            physical_dup);
1625
1626                         write_extent_buffer(eb, mdres->uuid,
1627                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1628                                                 chunk, 0),
1629                                         BTRFS_UUID_SIZE);
1630                 }
1631                 memcpy(buffer, eb->data, eb->len);
1632                 csum_block(buffer, eb->len);
1633 next:
1634                 size_left -= mdres->nodesize;
1635                 buffer += mdres->nodesize;
1636                 bytenr += mdres->nodesize;
1637         }
1638
1639         free(eb);
1640         return 0;
1641 }
1642
1643 static void write_backup_supers(int fd, u8 *buf)
1644 {
1645         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1646         struct stat st;
1647         u64 size;
1648         u64 bytenr;
1649         int i;
1650         int ret;
1651
1652         if (fstat(fd, &st)) {
1653                 error(
1654         "cannot stat restore point, won't be able to write backup supers: %s",
1655                         strerror(errno));
1656                 return;
1657         }
1658
1659         size = btrfs_device_size(fd, &st);
1660
1661         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1662                 bytenr = btrfs_sb_offset(i);
1663                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1664                         break;
1665                 btrfs_set_super_bytenr(super, bytenr);
1666                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1667                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1668                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1669                         if (ret < 0)
1670                                 error(
1671                                 "problem writing out backup super block %d: %s",
1672                                                 i, strerror(errno));
1673                         else
1674                                 error("short write writing out backup super block");
1675                         break;
1676                 }
1677         }
1678 }
1679
1680 static void *restore_worker(void *data)
1681 {
1682         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1683         struct async_work *async;
1684         size_t size;
1685         u8 *buffer;
1686         u8 *outbuf;
1687         int outfd;
1688         int ret;
1689         int compress_size = MAX_PENDING_SIZE * 4;
1690
1691         outfd = fileno(mdres->out);
1692         buffer = malloc(compress_size);
1693         if (!buffer) {
1694                 error("not enough memory for restore worker buffer");
1695                 pthread_mutex_lock(&mdres->mutex);
1696                 if (!mdres->error)
1697                         mdres->error = -ENOMEM;
1698                 pthread_mutex_unlock(&mdres->mutex);
1699                 pthread_exit(NULL);
1700         }
1701
1702         while (1) {
1703                 u64 bytenr, physical_dup;
1704                 off_t offset = 0;
1705                 int err = 0;
1706
1707                 pthread_mutex_lock(&mdres->mutex);
1708                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1709                         if (mdres->done) {
1710                                 pthread_mutex_unlock(&mdres->mutex);
1711                                 goto out;
1712                         }
1713                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1714                 }
1715                 async = list_entry(mdres->list.next, struct async_work, list);
1716                 list_del_init(&async->list);
1717                 pthread_mutex_unlock(&mdres->mutex);
1718
1719                 if (mdres->compress_method == COMPRESS_ZLIB) {
1720                         size = compress_size; 
1721                         ret = uncompress(buffer, (unsigned long *)&size,
1722                                          async->buffer, async->bufsize);
1723                         if (ret != Z_OK) {
1724                                 error("decompressiion failed with %d", ret);
1725                                 err = -EIO;
1726                         }
1727                         outbuf = buffer;
1728                 } else {
1729                         outbuf = async->buffer;
1730                         size = async->bufsize;
1731                 }
1732
1733                 if (!mdres->multi_devices) {
1734                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1735                                 if (mdres->old_restore) {
1736                                         update_super_old(outbuf);
1737                                 } else {
1738                                         ret = update_super(mdres, outbuf);
1739                                         if (ret)
1740                                                 err = ret;
1741                                 }
1742                         } else if (!mdres->old_restore) {
1743                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1744                                 if (ret)
1745                                         err = ret;
1746                         }
1747                 }
1748
1749                 if (!mdres->fixup_offset) {
1750                         while (size) {
1751                                 u64 chunk_size = size;
1752                                 physical_dup = 0;
1753                                 if (!mdres->multi_devices && !mdres->old_restore)
1754                                         bytenr = logical_to_physical(mdres,
1755                                                      async->start + offset,
1756                                                      &chunk_size,
1757                                                      &physical_dup);
1758                                 else
1759                                         bytenr = async->start + offset;
1760
1761                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1762                                                bytenr);
1763                                 if (ret != chunk_size)
1764                                         goto error;
1765
1766                                 if (physical_dup)
1767                                         ret = pwrite64(outfd, outbuf+offset,
1768                                                        chunk_size,
1769                                                        physical_dup);
1770                                 if (ret != chunk_size)
1771                                         goto error;
1772
1773                                 size -= chunk_size;
1774                                 offset += chunk_size;
1775                                 continue;
1776
1777 error:
1778                                 if (ret < 0) {
1779                                         error("unable to write to device: %s",
1780                                                         strerror(errno));
1781                                         err = errno;
1782                                 } else {
1783                                         error("short write");
1784                                         err = -EIO;
1785                                 }
1786                         }
1787                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1788                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1789                         if (ret) {
1790                                 error("failed to write data");
1791                                 exit(1);
1792                         }
1793                 }
1794
1795
1796                 /* backup super blocks are already there at fixup_offset stage */
1797                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1798                         write_backup_supers(outfd, outbuf);
1799
1800                 pthread_mutex_lock(&mdres->mutex);
1801                 if (err && !mdres->error)
1802                         mdres->error = err;
1803                 mdres->num_items--;
1804                 pthread_mutex_unlock(&mdres->mutex);
1805
1806                 free(async->buffer);
1807                 free(async);
1808         }
1809 out:
1810         free(buffer);
1811         pthread_exit(NULL);
1812 }
1813
1814 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1815 {
1816         struct rb_node *n;
1817         int i;
1818
1819         while ((n = rb_first(&mdres->chunk_tree))) {
1820                 struct fs_chunk *entry;
1821
1822                 entry = rb_entry(n, struct fs_chunk, l);
1823                 rb_erase(n, &mdres->chunk_tree);
1824                 rb_erase(&entry->p, &mdres->physical_tree);
1825                 free(entry);
1826         }
1827         pthread_mutex_lock(&mdres->mutex);
1828         mdres->done = 1;
1829         pthread_cond_broadcast(&mdres->cond);
1830         pthread_mutex_unlock(&mdres->mutex);
1831
1832         for (i = 0; i < num_threads; i++)
1833                 pthread_join(mdres->threads[i], NULL);
1834
1835         pthread_cond_destroy(&mdres->cond);
1836         pthread_mutex_destroy(&mdres->mutex);
1837 }
1838
1839 static int mdrestore_init(struct mdrestore_struct *mdres,
1840                           FILE *in, FILE *out, int old_restore,
1841                           int num_threads, int fixup_offset,
1842                           struct btrfs_fs_info *info, int multi_devices)
1843 {
1844         int i, ret = 0;
1845
1846         memset(mdres, 0, sizeof(*mdres));
1847         pthread_cond_init(&mdres->cond, NULL);
1848         pthread_mutex_init(&mdres->mutex, NULL);
1849         INIT_LIST_HEAD(&mdres->list);
1850         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1851         mdres->in = in;
1852         mdres->out = out;
1853         mdres->old_restore = old_restore;
1854         mdres->chunk_tree.rb_node = NULL;
1855         mdres->fixup_offset = fixup_offset;
1856         mdres->info = info;
1857         mdres->multi_devices = multi_devices;
1858         mdres->clear_space_cache = 0;
1859         mdres->last_physical_offset = 0;
1860         mdres->alloced_chunks = 0;
1861
1862         if (!num_threads)
1863                 return 0;
1864
1865         mdres->num_threads = num_threads;
1866         for (i = 0; i < num_threads; i++) {
1867                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1868                                      mdres);
1869                 if (ret) {
1870                         /* pthread_create returns errno directly */
1871                         ret = -ret;
1872                         break;
1873                 }
1874         }
1875         if (ret)
1876                 mdrestore_destroy(mdres, i + 1);
1877         return ret;
1878 }
1879
1880 static int fill_mdres_info(struct mdrestore_struct *mdres,
1881                            struct async_work *async)
1882 {
1883         struct btrfs_super_block *super;
1884         u8 *buffer = NULL;
1885         u8 *outbuf;
1886         int ret;
1887
1888         /* We've already been initialized */
1889         if (mdres->nodesize)
1890                 return 0;
1891
1892         if (mdres->compress_method == COMPRESS_ZLIB) {
1893                 size_t size = MAX_PENDING_SIZE * 2;
1894
1895                 buffer = malloc(MAX_PENDING_SIZE * 2);
1896                 if (!buffer)
1897                         return -ENOMEM;
1898                 ret = uncompress(buffer, (unsigned long *)&size,
1899                                  async->buffer, async->bufsize);
1900                 if (ret != Z_OK) {
1901                         error("decompressiion failed with %d", ret);
1902                         free(buffer);
1903                         return -EIO;
1904                 }
1905                 outbuf = buffer;
1906         } else {
1907                 outbuf = async->buffer;
1908         }
1909
1910         super = (struct btrfs_super_block *)outbuf;
1911         mdres->nodesize = btrfs_super_nodesize(super);
1912         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1913         memcpy(mdres->uuid, super->dev_item.uuid,
1914                        BTRFS_UUID_SIZE);
1915         mdres->devid = le64_to_cpu(super->dev_item.devid);
1916         free(buffer);
1917         return 0;
1918 }
1919
1920 static int add_cluster(struct meta_cluster *cluster,
1921                        struct mdrestore_struct *mdres, u64 *next)
1922 {
1923         struct meta_cluster_item *item;
1924         struct meta_cluster_header *header = &cluster->header;
1925         struct async_work *async;
1926         u64 bytenr;
1927         u32 i, nritems;
1928         int ret;
1929
1930         mdres->compress_method = header->compress;
1931
1932         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1933         nritems = le32_to_cpu(header->nritems);
1934         for (i = 0; i < nritems; i++) {
1935                 item = &cluster->items[i];
1936                 async = calloc(1, sizeof(*async));
1937                 if (!async) {
1938                         error("not enough memory for async data");
1939                         return -ENOMEM;
1940                 }
1941                 async->start = le64_to_cpu(item->bytenr);
1942                 async->bufsize = le32_to_cpu(item->size);
1943                 async->buffer = malloc(async->bufsize);
1944                 if (!async->buffer) {
1945                         error("not enough memory for async buffer");
1946                         free(async);
1947                         return -ENOMEM;
1948                 }
1949                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1950                 if (ret != 1) {
1951                         error("unable to read buffer: %s", strerror(errno));
1952                         free(async->buffer);
1953                         free(async);
1954                         return -EIO;
1955                 }
1956                 bytenr += async->bufsize;
1957
1958                 pthread_mutex_lock(&mdres->mutex);
1959                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1960                         ret = fill_mdres_info(mdres, async);
1961                         if (ret) {
1962                                 error("unable to set up restore state");
1963                                 pthread_mutex_unlock(&mdres->mutex);
1964                                 free(async->buffer);
1965                                 free(async);
1966                                 return ret;
1967                         }
1968                 }
1969                 list_add_tail(&async->list, &mdres->list);
1970                 mdres->num_items++;
1971                 pthread_cond_signal(&mdres->cond);
1972                 pthread_mutex_unlock(&mdres->mutex);
1973         }
1974         if (bytenr & BLOCK_MASK) {
1975                 char buffer[BLOCK_MASK];
1976                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1977
1978                 bytenr += size;
1979                 ret = fread(buffer, size, 1, mdres->in);
1980                 if (ret != 1) {
1981                         error("failed to read buffer: %s", strerror(errno));
1982                         return -EIO;
1983                 }
1984         }
1985         *next = bytenr;
1986         return 0;
1987 }
1988
1989 static int wait_for_worker(struct mdrestore_struct *mdres)
1990 {
1991         int ret = 0;
1992
1993         pthread_mutex_lock(&mdres->mutex);
1994         ret = mdres->error;
1995         while (!ret && mdres->num_items > 0) {
1996                 struct timespec ts = {
1997                         .tv_sec = 0,
1998                         .tv_nsec = 10000000,
1999                 };
2000                 pthread_mutex_unlock(&mdres->mutex);
2001                 nanosleep(&ts, NULL);
2002                 pthread_mutex_lock(&mdres->mutex);
2003                 ret = mdres->error;
2004         }
2005         pthread_mutex_unlock(&mdres->mutex);
2006         return ret;
2007 }
2008
2009 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2010                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2011                             u64 cluster_bytenr)
2012 {
2013         struct extent_buffer *eb;
2014         int ret = 0;
2015         int i;
2016
2017         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2018         if (!eb) {
2019                 ret = -ENOMEM;
2020                 goto out;
2021         }
2022
2023         while (item_bytenr != bytenr) {
2024                 buffer += mdres->nodesize;
2025                 item_bytenr += mdres->nodesize;
2026         }
2027
2028         memcpy(eb->data, buffer, mdres->nodesize);
2029         if (btrfs_header_bytenr(eb) != bytenr) {
2030                 error("eb bytenr does not match found bytenr: %llu != %llu",
2031                                 (unsigned long long)btrfs_header_bytenr(eb),
2032                                 (unsigned long long)bytenr);
2033                 ret = -EIO;
2034                 goto out;
2035         }
2036
2037         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2038                    BTRFS_FSID_SIZE)) {
2039                 error("filesystem UUID of eb %llu does not match",
2040                                 (unsigned long long)bytenr);
2041                 ret = -EIO;
2042                 goto out;
2043         }
2044
2045         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2046                 error("wrong eb %llu owner %llu",
2047                                 (unsigned long long)bytenr,
2048                                 (unsigned long long)btrfs_header_owner(eb));
2049                 ret = -EIO;
2050                 goto out;
2051         }
2052
2053         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2054                 struct btrfs_chunk *chunk;
2055                 struct fs_chunk *fs_chunk;
2056                 struct btrfs_key key;
2057                 u64 type;
2058
2059                 if (btrfs_header_level(eb)) {
2060                         u64 blockptr = btrfs_node_blockptr(eb, i);
2061
2062                         ret = search_for_chunk_blocks(mdres, blockptr,
2063                                                       cluster_bytenr);
2064                         if (ret)
2065                                 break;
2066                         continue;
2067                 }
2068
2069                 /* Yay a leaf!  We loves leafs! */
2070                 btrfs_item_key_to_cpu(eb, &key, i);
2071                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2072                         continue;
2073
2074                 fs_chunk = malloc(sizeof(struct fs_chunk));
2075                 if (!fs_chunk) {
2076                         error("not enough memory to allocate chunk");
2077                         ret = -ENOMEM;
2078                         break;
2079                 }
2080                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2081                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2082
2083                 fs_chunk->logical = key.offset;
2084                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2085                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2086                 INIT_LIST_HEAD(&fs_chunk->list);
2087                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2088                                 physical_cmp, 1) != NULL)
2089                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2090                 else
2091                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2092                                     physical_cmp);
2093
2094                 type = btrfs_chunk_type(eb, chunk);
2095                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2096                         fs_chunk->physical_dup =
2097                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2098                 }
2099
2100                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2101                     mdres->last_physical_offset)
2102                         mdres->last_physical_offset = fs_chunk->physical_dup +
2103                                 fs_chunk->bytes;
2104                 else if (fs_chunk->physical + fs_chunk->bytes >
2105                     mdres->last_physical_offset)
2106                         mdres->last_physical_offset = fs_chunk->physical +
2107                                 fs_chunk->bytes;
2108                 mdres->alloced_chunks += fs_chunk->bytes;
2109                 /* in dup case, fs_chunk->bytes should add twice */
2110                 if (fs_chunk->physical_dup)
2111                         mdres->alloced_chunks += fs_chunk->bytes;
2112                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2113         }
2114 out:
2115         free(eb);
2116         return ret;
2117 }
2118
2119 /* If you have to ask you aren't worthy */
2120 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2121                                    u64 search, u64 cluster_bytenr)
2122 {
2123         struct meta_cluster *cluster;
2124         struct meta_cluster_header *header;
2125         struct meta_cluster_item *item;
2126         u64 current_cluster = cluster_bytenr, bytenr;
2127         u64 item_bytenr;
2128         u32 bufsize, nritems, i;
2129         u32 max_size = MAX_PENDING_SIZE * 2;
2130         u8 *buffer, *tmp = NULL;
2131         int ret = 0;
2132
2133         cluster = malloc(BLOCK_SIZE);
2134         if (!cluster) {
2135                 error("not enough memory for cluster");
2136                 return -ENOMEM;
2137         }
2138
2139         buffer = malloc(max_size);
2140         if (!buffer) {
2141                 error("not enough memory for buffer");
2142                 free(cluster);
2143                 return -ENOMEM;
2144         }
2145
2146         if (mdres->compress_method == COMPRESS_ZLIB) {
2147                 tmp = malloc(max_size);
2148                 if (!tmp) {
2149                         error("not enough memory for buffer");
2150                         free(cluster);
2151                         free(buffer);
2152                         return -ENOMEM;
2153                 }
2154         }
2155
2156         bytenr = current_cluster;
2157         while (1) {
2158                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2159                         error("seek failed: %s\n", strerror(errno));
2160                         ret = -EIO;
2161                         break;
2162                 }
2163
2164                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2165                 if (ret == 0) {
2166                         if (cluster_bytenr != 0) {
2167                                 cluster_bytenr = 0;
2168                                 current_cluster = 0;
2169                                 bytenr = 0;
2170                                 continue;
2171                         }
2172                         error(
2173         "unknown state after reading cluster at %llu, probably crrupted data",
2174                                         cluster_bytenr);
2175                         ret = -EIO;
2176                         break;
2177                 } else if (ret < 0) {
2178                         error("unable to read image at %llu: %s",
2179                                         (unsigned long long)cluster_bytenr,
2180                                         strerror(errno));
2181                         break;
2182                 }
2183                 ret = 0;
2184
2185                 header = &cluster->header;
2186                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2187                     le64_to_cpu(header->bytenr) != current_cluster) {
2188                         error("bad header in metadump image");
2189                         ret = -EIO;
2190                         break;
2191                 }
2192
2193                 bytenr += BLOCK_SIZE;
2194                 nritems = le32_to_cpu(header->nritems);
2195                 for (i = 0; i < nritems; i++) {
2196                         size_t size;
2197
2198                         item = &cluster->items[i];
2199                         bufsize = le32_to_cpu(item->size);
2200                         item_bytenr = le64_to_cpu(item->bytenr);
2201
2202                         if (bufsize > max_size) {
2203                                 error("item %u too big: %u > %u", i, bufsize,
2204                                                 max_size);
2205                                 ret = -EIO;
2206                                 break;
2207                         }
2208
2209                         if (mdres->compress_method == COMPRESS_ZLIB) {
2210                                 ret = fread(tmp, bufsize, 1, mdres->in);
2211                                 if (ret != 1) {
2212                                         error("read error: %s", strerror(errno));
2213                                         ret = -EIO;
2214                                         break;
2215                                 }
2216
2217                                 size = max_size;
2218                                 ret = uncompress(buffer,
2219                                                  (unsigned long *)&size, tmp,
2220                                                  bufsize);
2221                                 if (ret != Z_OK) {
2222                                         error("decompressiion failed with %d",
2223                                                         ret);
2224                                         ret = -EIO;
2225                                         break;
2226                                 }
2227                         } else {
2228                                 ret = fread(buffer, bufsize, 1, mdres->in);
2229                                 if (ret != 1) {
2230                                         error("read error: %s",
2231                                                         strerror(errno));
2232                                         ret = -EIO;
2233                                         break;
2234                                 }
2235                                 size = bufsize;
2236                         }
2237                         ret = 0;
2238
2239                         if (item_bytenr <= search &&
2240                             item_bytenr + size > search) {
2241                                 ret = read_chunk_block(mdres, buffer, search,
2242                                                        item_bytenr, size,
2243                                                        current_cluster);
2244                                 if (!ret)
2245                                         ret = 1;
2246                                 break;
2247                         }
2248                         bytenr += bufsize;
2249                 }
2250                 if (ret) {
2251                         if (ret > 0)
2252                                 ret = 0;
2253                         break;
2254                 }
2255                 if (bytenr & BLOCK_MASK)
2256                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2257                 current_cluster = bytenr;
2258         }
2259
2260         free(tmp);
2261         free(buffer);
2262         free(cluster);
2263         return ret;
2264 }
2265
2266 static int build_chunk_tree(struct mdrestore_struct *mdres,
2267                             struct meta_cluster *cluster)
2268 {
2269         struct btrfs_super_block *super;
2270         struct meta_cluster_header *header;
2271         struct meta_cluster_item *item = NULL;
2272         u64 chunk_root_bytenr = 0;
2273         u32 i, nritems;
2274         u64 bytenr = 0;
2275         u8 *buffer;
2276         int ret;
2277
2278         /* We can't seek with stdin so don't bother doing this */
2279         if (mdres->in == stdin)
2280                 return 0;
2281
2282         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2283         if (ret <= 0) {
2284                 error("unable to read cluster: %s", strerror(errno));
2285                 return -EIO;
2286         }
2287         ret = 0;
2288
2289         header = &cluster->header;
2290         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2291             le64_to_cpu(header->bytenr) != 0) {
2292                 error("bad header in metadump image");
2293                 return -EIO;
2294         }
2295
2296         bytenr += BLOCK_SIZE;
2297         mdres->compress_method = header->compress;
2298         nritems = le32_to_cpu(header->nritems);
2299         for (i = 0; i < nritems; i++) {
2300                 item = &cluster->items[i];
2301
2302                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2303                         break;
2304                 bytenr += le32_to_cpu(item->size);
2305                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2306                         error("seek failed: %s\n", strerror(errno));
2307                         return -EIO;
2308                 }
2309         }
2310
2311         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2312                 error("did not find superblock at %llu",
2313                                 le64_to_cpu(item->bytenr));
2314                 return -EINVAL;
2315         }
2316
2317         buffer = malloc(le32_to_cpu(item->size));
2318         if (!buffer) {
2319                 error("not enough memory to allocate buffer");
2320                 return -ENOMEM;
2321         }
2322
2323         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2324         if (ret != 1) {
2325                 error("unable to read buffer: %s", strerror(errno));
2326                 free(buffer);
2327                 return -EIO;
2328         }
2329
2330         if (mdres->compress_method == COMPRESS_ZLIB) {
2331                 size_t size = MAX_PENDING_SIZE * 2;
2332                 u8 *tmp;
2333
2334                 tmp = malloc(MAX_PENDING_SIZE * 2);
2335                 if (!tmp) {
2336                         free(buffer);
2337                         return -ENOMEM;
2338                 }
2339                 ret = uncompress(tmp, (unsigned long *)&size,
2340                                  buffer, le32_to_cpu(item->size));
2341                 if (ret != Z_OK) {
2342                         error("decompressiion failed with %d", ret);
2343                         free(buffer);
2344                         free(tmp);
2345                         return -EIO;
2346                 }
2347                 free(buffer);
2348                 buffer = tmp;
2349         }
2350
2351         pthread_mutex_lock(&mdres->mutex);
2352         super = (struct btrfs_super_block *)buffer;
2353         chunk_root_bytenr = btrfs_super_chunk_root(super);
2354         mdres->nodesize = btrfs_super_nodesize(super);
2355         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2356         memcpy(mdres->uuid, super->dev_item.uuid,
2357                        BTRFS_UUID_SIZE);
2358         mdres->devid = le64_to_cpu(super->dev_item.devid);
2359         free(buffer);
2360         pthread_mutex_unlock(&mdres->mutex);
2361
2362         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2363 }
2364
2365 static int range_contains_super(u64 physical, u64 bytes)
2366 {
2367         u64 super_bytenr;
2368         int i;
2369
2370         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2371                 super_bytenr = btrfs_sb_offset(i);
2372                 if (super_bytenr >= physical &&
2373                     super_bytenr < physical + bytes)
2374                         return 1;
2375         }
2376
2377         return 0;
2378 }
2379
2380 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2381 {
2382         struct fs_chunk *fs_chunk;
2383
2384         while (!list_empty(&mdres->overlapping_chunks)) {
2385                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2386                                             struct fs_chunk, list);
2387                 list_del_init(&fs_chunk->list);
2388                 if (range_contains_super(fs_chunk->physical,
2389                                          fs_chunk->bytes)) {
2390                         warning(
2391 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2392                         mdres->clear_space_cache = 1;
2393                 }
2394                 fs_chunk->physical = mdres->last_physical_offset;
2395                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2396                 mdres->last_physical_offset += fs_chunk->bytes;
2397         }
2398 }
2399
2400 static int fixup_devices(struct btrfs_fs_info *fs_info,
2401                          struct mdrestore_struct *mdres, off_t dev_size)
2402 {
2403         struct btrfs_trans_handle *trans;
2404         struct btrfs_dev_item *dev_item;
2405         struct btrfs_path path;
2406         struct extent_buffer *leaf;
2407         struct btrfs_root *root = fs_info->chunk_root;
2408         struct btrfs_key key;
2409         u64 devid, cur_devid;
2410         int ret;
2411
2412         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2413         if (IS_ERR(trans)) {
2414                 error("cannot starting transaction %ld", PTR_ERR(trans));
2415                 return PTR_ERR(trans);
2416         }
2417
2418         dev_item = &fs_info->super_copy->dev_item;
2419
2420         devid = btrfs_stack_device_id(dev_item);
2421
2422         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2423         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2424
2425         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2426         key.type = BTRFS_DEV_ITEM_KEY;
2427         key.offset = 0;
2428
2429         btrfs_init_path(&path);
2430
2431 again:
2432         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2433         if (ret < 0) {
2434                 error("search failed: %d", ret);
2435                 exit(1);
2436         }
2437
2438         while (1) {
2439                 leaf = path.nodes[0];
2440                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2441                         ret = btrfs_next_leaf(root, &path);
2442                         if (ret < 0) {
2443                                 error("cannot go to next leaf %d", ret);
2444                                 exit(1);
2445                         }
2446                         if (ret > 0) {
2447                                 ret = 0;
2448                                 break;
2449                         }
2450                         leaf = path.nodes[0];
2451                 }
2452
2453                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2454                 if (key.type > BTRFS_DEV_ITEM_KEY)
2455                         break;
2456                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2457                         path.slots[0]++;
2458                         continue;
2459                 }
2460
2461                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2462                                           struct btrfs_dev_item);
2463                 cur_devid = btrfs_device_id(leaf, dev_item);
2464                 if (devid != cur_devid) {
2465                         ret = btrfs_del_item(trans, root, &path);
2466                         if (ret) {
2467                                 error("cannot delete item: %d", ret);
2468                                 exit(1);
2469                         }
2470                         btrfs_release_path(&path);
2471                         goto again;
2472                 }
2473
2474                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2475                 btrfs_set_device_bytes_used(leaf, dev_item,
2476                                             mdres->alloced_chunks);
2477                 btrfs_mark_buffer_dirty(leaf);
2478                 path.slots[0]++;
2479         }
2480
2481         btrfs_release_path(&path);
2482         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2483         if (ret) {
2484                 error("unable to commit transaction: %d", ret);
2485                 return ret;
2486         }
2487         return 0;
2488 }
2489
2490 static int restore_metadump(const char *input, FILE *out, int old_restore,
2491                             int num_threads, int fixup_offset,
2492                             const char *target, int multi_devices)
2493 {
2494         struct meta_cluster *cluster = NULL;
2495         struct meta_cluster_header *header;
2496         struct mdrestore_struct mdrestore;
2497         struct btrfs_fs_info *info = NULL;
2498         u64 bytenr = 0;
2499         FILE *in = NULL;
2500         int ret = 0;
2501
2502         if (!strcmp(input, "-")) {
2503                 in = stdin;
2504         } else {
2505                 in = fopen(input, "r");
2506                 if (!in) {
2507                         error("unable to open metadump image: %s",
2508                                         strerror(errno));
2509                         return 1;
2510                 }
2511         }
2512
2513         /* NOTE: open with write mode */
2514         if (fixup_offset) {
2515                 info = open_ctree_fs_info(target, 0, 0, 0,
2516                                           OPEN_CTREE_WRITES |
2517                                           OPEN_CTREE_RESTORE |
2518                                           OPEN_CTREE_PARTIAL);
2519                 if (!info) {
2520                         error("open ctree failed");
2521                         ret = -EIO;
2522                         goto failed_open;
2523                 }
2524         }
2525
2526         cluster = malloc(BLOCK_SIZE);
2527         if (!cluster) {
2528                 error("not enough memory for cluster");
2529                 ret = -ENOMEM;
2530                 goto failed_info;
2531         }
2532
2533         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2534                              fixup_offset, info, multi_devices);
2535         if (ret) {
2536                 error("failed to intialize metadata restore state: %d", ret);
2537                 goto failed_cluster;
2538         }
2539
2540         if (!multi_devices && !old_restore) {
2541                 ret = build_chunk_tree(&mdrestore, cluster);
2542                 if (ret)
2543                         goto out;
2544                 if (!list_empty(&mdrestore.overlapping_chunks))
2545                         remap_overlapping_chunks(&mdrestore);
2546         }
2547
2548         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2549                 error("seek failed: %s\n", strerror(errno));
2550                 goto out;
2551         }
2552
2553         while (!mdrestore.error) {
2554                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2555                 if (!ret)
2556                         break;
2557
2558                 header = &cluster->header;
2559                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2560                     le64_to_cpu(header->bytenr) != bytenr) {
2561                         error("bad header in metadump image");
2562                         ret = -EIO;
2563                         break;
2564                 }
2565                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2566                 if (ret) {
2567                         error("failed to add cluster: %d", ret);
2568                         break;
2569                 }
2570         }
2571         ret = wait_for_worker(&mdrestore);
2572
2573         if (!ret && !multi_devices && !old_restore) {
2574                 struct btrfs_root *root;
2575                 struct stat st;
2576
2577                 root = open_ctree_fd(fileno(out), target, 0,
2578                                           OPEN_CTREE_PARTIAL |
2579                                           OPEN_CTREE_WRITES |
2580                                           OPEN_CTREE_NO_DEVICES);
2581                 if (!root) {
2582                         error("open ctree failed in %s", target);
2583                         ret = -EIO;
2584                         goto out;
2585                 }
2586                 info = root->fs_info;
2587
2588                 if (stat(target, &st)) {
2589                         error("stat %s failed: %s", target, strerror(errno));
2590                         close_ctree(info->chunk_root);
2591                         free(cluster);
2592                         return 1;
2593                 }
2594
2595                 ret = fixup_devices(info, &mdrestore, st.st_size);
2596                 close_ctree(info->chunk_root);
2597                 if (ret)
2598                         goto out;
2599         }
2600 out:
2601         mdrestore_destroy(&mdrestore, num_threads);
2602 failed_cluster:
2603         free(cluster);
2604 failed_info:
2605         if (fixup_offset && info)
2606                 close_ctree(info->chunk_root);
2607 failed_open:
2608         if (in != stdin)
2609                 fclose(in);
2610         return ret;
2611 }
2612
2613 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2614                                        const char *other_dev, u64 cur_devid)
2615 {
2616         struct btrfs_key key;
2617         struct extent_buffer *leaf;
2618         struct btrfs_path path;
2619         struct btrfs_dev_item *dev_item;
2620         struct btrfs_super_block *disk_super;
2621         char dev_uuid[BTRFS_UUID_SIZE];
2622         char fs_uuid[BTRFS_UUID_SIZE];
2623         u64 devid, type, io_align, io_width;
2624         u64 sector_size, total_bytes, bytes_used;
2625         char buf[BTRFS_SUPER_INFO_SIZE];
2626         int fp = -1;
2627         int ret;
2628
2629         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2630         key.type = BTRFS_DEV_ITEM_KEY;
2631         key.offset = cur_devid;
2632
2633         btrfs_init_path(&path);
2634         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2635         if (ret) {
2636                 error("search key failed: %d", ret);
2637                 ret = -EIO;
2638                 goto out;
2639         }
2640
2641         leaf = path.nodes[0];
2642         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2643                                   struct btrfs_dev_item);
2644
2645         devid = btrfs_device_id(leaf, dev_item);
2646         if (devid != cur_devid) {
2647                 error("devid mismatch: %llu != %llu",
2648                                 (unsigned long long)devid,
2649                                 (unsigned long long)cur_devid);
2650                 ret = -EIO;
2651                 goto out;
2652         }
2653
2654         type = btrfs_device_type(leaf, dev_item);
2655         io_align = btrfs_device_io_align(leaf, dev_item);
2656         io_width = btrfs_device_io_width(leaf, dev_item);
2657         sector_size = btrfs_device_sector_size(leaf, dev_item);
2658         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2659         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2660         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2661         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2662
2663         btrfs_release_path(&path);
2664
2665         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2666
2667         /* update other devices' super block */
2668         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2669         if (fp < 0) {
2670                 error("could not open %s: %s", other_dev, strerror(errno));
2671                 ret = -EIO;
2672                 goto out;
2673         }
2674
2675         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2676
2677         disk_super = (struct btrfs_super_block *)buf;
2678         dev_item = &disk_super->dev_item;
2679
2680         btrfs_set_stack_device_type(dev_item, type);
2681         btrfs_set_stack_device_id(dev_item, devid);
2682         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2683         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2684         btrfs_set_stack_device_io_align(dev_item, io_align);
2685         btrfs_set_stack_device_io_width(dev_item, io_width);
2686         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2687         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2688         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2689         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2690
2691         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2692         if (ret != BTRFS_SUPER_INFO_SIZE) {
2693                 if (ret < 0)
2694                         error("cannot write superblock: %s", strerror(ret));
2695                 else
2696                         error("cannot write superblock");
2697                 ret = -EIO;
2698                 goto out;
2699         }
2700
2701         write_backup_supers(fp, (u8 *)buf);
2702
2703 out:
2704         if (fp != -1)
2705                 close(fp);
2706         return ret;
2707 }
2708
2709 static void print_usage(int ret)
2710 {
2711         printf("usage: btrfs-image [options] source target\n");
2712         printf("\t-r      \trestore metadump image\n");
2713         printf("\t-c value\tcompression level (0 ~ 9)\n");
2714         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2715         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2716         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2717         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2718         printf("\t-m       \trestore for multiple devices\n");
2719         printf("\n");
2720         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2721         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2722         exit(ret);
2723 }
2724
2725 int main(int argc, char *argv[])
2726 {
2727         char *source;
2728         char *target;
2729         u64 num_threads = 0;
2730         u64 compress_level = 0;
2731         int create = 1;
2732         int old_restore = 0;
2733         int walk_trees = 0;
2734         int multi_devices = 0;
2735         int ret;
2736         int sanitize = 0;
2737         int dev_cnt = 0;
2738         int usage_error = 0;
2739         FILE *out;
2740
2741         while (1) {
2742                 static const struct option long_options[] = {
2743                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2744                         { NULL, 0, NULL, 0 }
2745                 };
2746                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2747                 if (c < 0)
2748                         break;
2749                 switch (c) {
2750                 case 'r':
2751                         create = 0;
2752                         break;
2753                 case 't':
2754                         num_threads = arg_strtou64(optarg);
2755                         if (num_threads > MAX_WORKER_THREADS) {
2756                                 error("number of threads out of range: %llu > %d",
2757                                         (unsigned long long)num_threads,
2758                                         MAX_WORKER_THREADS);
2759                                 return 1;
2760                         }
2761                         break;
2762                 case 'c':
2763                         compress_level = arg_strtou64(optarg);
2764                         if (compress_level > 9) {
2765                                 error("compression level out of range: %llu",
2766                                         (unsigned long long)compress_level);
2767                                 return 1;
2768                         }
2769                         break;
2770                 case 'o':
2771                         old_restore = 1;
2772                         break;
2773                 case 's':
2774                         sanitize++;
2775                         break;
2776                 case 'w':
2777                         walk_trees = 1;
2778                         break;
2779                 case 'm':
2780                         create = 0;
2781                         multi_devices = 1;
2782                         break;
2783                         case GETOPT_VAL_HELP:
2784                 default:
2785                         print_usage(c != GETOPT_VAL_HELP);
2786                 }
2787         }
2788
2789         set_argv0(argv);
2790         if (check_argc_min(argc - optind, 2))
2791                 print_usage(1);
2792
2793         dev_cnt = argc - optind - 1;
2794
2795         if (create) {
2796                 if (old_restore) {
2797                         error(
2798                         "create and restore cannot be used at the same time");
2799                         usage_error++;
2800                 }
2801         } else {
2802                 if (walk_trees || sanitize || compress_level) {
2803                         error(
2804                         "useing -w, -s, -c options for restore makes no sense");
2805                         usage_error++;
2806                 }
2807                 if (multi_devices && dev_cnt < 2) {
2808                         error("not enough devices specified for -m option");
2809                         usage_error++;
2810                 }
2811                 if (!multi_devices && dev_cnt != 1) {
2812                         error("accepts only 1 device without -m option");
2813                         usage_error++;
2814                 }
2815         }
2816
2817         if (usage_error)
2818                 print_usage(1);
2819
2820         source = argv[optind];
2821         target = argv[optind + 1];
2822
2823         if (create && !strcmp(target, "-")) {
2824                 out = stdout;
2825         } else {
2826                 out = fopen(target, "w+");
2827                 if (!out) {
2828                         error("unable to create target file %s", target);
2829                         exit(1);
2830                 }
2831         }
2832
2833         if (compress_level > 0 || create == 0) {
2834                 if (num_threads == 0) {
2835                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2836
2837                         if (tmp <= 0)
2838                                 tmp = 1;
2839                         num_threads = tmp;
2840                 }
2841         } else {
2842                 num_threads = 0;
2843         }
2844
2845         if (create) {
2846                 ret = check_mounted(source);
2847                 if (ret < 0) {
2848                         warning("unable to check mount status of: %s",
2849                                         strerror(-ret));
2850                 } else if (ret) {
2851                         warning("%s already mounted, results may be inaccurate",
2852                                         source);
2853                 }
2854
2855                 ret = create_metadump(source, out, num_threads,
2856                                       compress_level, sanitize, walk_trees);
2857         } else {
2858                 ret = restore_metadump(source, out, old_restore, num_threads,
2859                                        0, target, multi_devices);
2860         }
2861         if (ret) {
2862                 error("%s failed: %s", (create) ? "create" : "restore",
2863                        strerror(errno));
2864                 goto out;
2865         }
2866
2867          /* extended support for multiple devices */
2868         if (!create && multi_devices) {
2869                 struct btrfs_fs_info *info;
2870                 u64 total_devs;
2871                 int i;
2872
2873                 info = open_ctree_fs_info(target, 0, 0, 0,
2874                                           OPEN_CTREE_PARTIAL |
2875                                           OPEN_CTREE_RESTORE);
2876                 if (!info) {
2877                         error("open ctree failed at %s", target);
2878                         return 1;
2879                 }
2880
2881                 total_devs = btrfs_super_num_devices(info->super_copy);
2882                 if (total_devs != dev_cnt) {
2883                         error("it needs %llu devices but has only %d",
2884                                 total_devs, dev_cnt);
2885                         close_ctree(info->chunk_root);
2886                         goto out;
2887                 }
2888
2889                 /* update super block on other disks */
2890                 for (i = 2; i <= dev_cnt; i++) {
2891                         ret = update_disk_super_on_device(info,
2892                                         argv[optind + i], (u64)i);
2893                         if (ret) {
2894                                 error("update disk superblock failed devid %d: %d",
2895                                         i, ret);
2896                                 close_ctree(info->chunk_root);
2897                                 exit(1);
2898                         }
2899                 }
2900
2901                 close_ctree(info->chunk_root);
2902
2903                 /* fix metadata block to map correct chunk */
2904                 ret = restore_metadump(source, out, 0, num_threads, 1,
2905                                        target, 1);
2906                 if (ret) {
2907                         error("unable to fixup metadump: %d", ret);
2908                         exit(1);
2909                 }
2910         }
2911 out:
2912         if (out == stdout) {
2913                 fflush(out);
2914         } else {
2915                 fclose(out);
2916                 if (ret && create) {
2917                         int unlink_ret;
2918
2919                         unlink_ret = unlink(target);
2920                         if (unlink_ret)
2921                                 error("unlink output file %s failed: %s",
2922                                                 target, strerror(errno));
2923                 }
2924         }
2925
2926         btrfs_close_all_devices();
2927
2928         return !!ret;
2929 }