btrfs-progs: Refactor nodesize users in image/main.c
[platform/upstream/btrfs-progs.git] / image / main.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38 #include "help.h"
39
40 #define HEADER_MAGIC            0xbd5c25e27295668bULL
41 #define MAX_PENDING_SIZE        (256 * 1024)
42 #define BLOCK_SIZE              1024
43 #define BLOCK_MASK              (BLOCK_SIZE - 1)
44
45 #define COMPRESS_NONE           0
46 #define COMPRESS_ZLIB           1
47
48 #define MAX_WORKER_THREADS      (32)
49
50 struct meta_cluster_item {
51         __le64 bytenr;
52         __le32 size;
53 } __attribute__ ((__packed__));
54
55 struct meta_cluster_header {
56         __le64 magic;
57         __le64 bytenr;
58         __le32 nritems;
59         u8 compress;
60 } __attribute__ ((__packed__));
61
62 /* cluster header + index items + buffers */
63 struct meta_cluster {
64         struct meta_cluster_header header;
65         struct meta_cluster_item items[];
66 } __attribute__ ((__packed__));
67
68 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
69                            sizeof(struct meta_cluster_item))
70
71 struct fs_chunk {
72         u64 logical;
73         u64 physical;
74         /*
75          * physical_dup only store additonal physical for BTRFS_BLOCK_GROUP_DUP
76          * currently restore only support single and DUP
77          * TODO: modify this structure and the function related to this
78          * structure for support RAID*
79          */
80         u64 physical_dup;
81         u64 bytes;
82         struct rb_node l;
83         struct rb_node p;
84         struct list_head list;
85 };
86
87 struct async_work {
88         struct list_head list;
89         struct list_head ordered;
90         u64 start;
91         u64 size;
92         u8 *buffer;
93         size_t bufsize;
94         int error;
95 };
96
97 struct metadump_struct {
98         struct btrfs_root *root;
99         FILE *out;
100
101         union {
102                 struct meta_cluster cluster;
103                 char meta_cluster_bytes[BLOCK_SIZE];
104         };
105
106         pthread_t threads[MAX_WORKER_THREADS];
107         size_t num_threads;
108         pthread_mutex_t mutex;
109         pthread_cond_t cond;
110         struct rb_root name_tree;
111
112         struct list_head list;
113         struct list_head ordered;
114         size_t num_items;
115         size_t num_ready;
116
117         u64 pending_start;
118         u64 pending_size;
119
120         int compress_level;
121         int done;
122         int data;
123         int sanitize_names;
124
125         int error;
126 };
127
128 struct name {
129         struct rb_node n;
130         char *val;
131         char *sub;
132         u32 len;
133 };
134
135 struct mdrestore_struct {
136         FILE *in;
137         FILE *out;
138
139         pthread_t threads[MAX_WORKER_THREADS];
140         size_t num_threads;
141         pthread_mutex_t mutex;
142         pthread_cond_t cond;
143
144         struct rb_root chunk_tree;
145         struct rb_root physical_tree;
146         struct list_head list;
147         struct list_head overlapping_chunks;
148         size_t num_items;
149         u32 nodesize;
150         u64 devid;
151         u64 alloced_chunks;
152         u64 last_physical_offset;
153         u8 uuid[BTRFS_UUID_SIZE];
154         u8 fsid[BTRFS_FSID_SIZE];
155
156         int compress_method;
157         int done;
158         int error;
159         int old_restore;
160         int fixup_offset;
161         int multi_devices;
162         int clear_space_cache;
163         struct btrfs_fs_info *info;
164 };
165
166 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
167                                    u64 search, u64 cluster_bytenr);
168 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
169
170 static void csum_block(u8 *buf, size_t len)
171 {
172         u8 result[BTRFS_CRC32_SIZE];
173         u32 crc = ~(u32)0;
174         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
175         btrfs_csum_final(crc, result);
176         memcpy(buf, result, BTRFS_CRC32_SIZE);
177 }
178
179 static int has_name(struct btrfs_key *key)
180 {
181         switch (key->type) {
182         case BTRFS_DIR_ITEM_KEY:
183         case BTRFS_DIR_INDEX_KEY:
184         case BTRFS_INODE_REF_KEY:
185         case BTRFS_INODE_EXTREF_KEY:
186         case BTRFS_XATTR_ITEM_KEY:
187                 return 1;
188         default:
189                 break;
190         }
191
192         return 0;
193 }
194
195 static char *generate_garbage(u32 name_len)
196 {
197         char *buf = malloc(name_len);
198         int i;
199
200         if (!buf)
201                 return NULL;
202
203         for (i = 0; i < name_len; i++) {
204                 char c = rand_range(94) + 33;
205
206                 if (c == '/')
207                         c++;
208                 buf[i] = c;
209         }
210
211         return buf;
212 }
213
214 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
215 {
216         struct name *entry = rb_entry(a, struct name, n);
217         struct name *ins = rb_entry(b, struct name, n);
218         u32 len;
219
220         len = min(ins->len, entry->len);
221         return memcmp(ins->val, entry->val, len);
222 }
223
224 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
225 {
226         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
227         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
228
229         if (fuzz && ins->logical >= entry->logical &&
230             ins->logical < entry->logical + entry->bytes)
231                 return 0;
232
233         if (ins->logical < entry->logical)
234                 return -1;
235         else if (ins->logical > entry->logical)
236                 return 1;
237         return 0;
238 }
239
240 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
241 {
242         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
243         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
244
245         if (fuzz && ins->physical >= entry->physical &&
246             ins->physical < entry->physical + entry->bytes)
247                 return 0;
248
249         if (fuzz && entry->physical >= ins->physical &&
250             entry->physical < ins->physical + ins->bytes)
251                 return 0;
252
253         if (ins->physical < entry->physical)
254                 return -1;
255         else if (ins->physical > entry->physical)
256                 return 1;
257         return 0;
258 }
259
260 static void tree_insert(struct rb_root *root, struct rb_node *ins,
261                         int (*cmp)(struct rb_node *a, struct rb_node *b,
262                                    int fuzz))
263 {
264         struct rb_node ** p = &root->rb_node;
265         struct rb_node * parent = NULL;
266         int dir;
267
268         while(*p) {
269                 parent = *p;
270
271                 dir = cmp(*p, ins, 1);
272                 if (dir < 0)
273                         p = &(*p)->rb_left;
274                 else if (dir > 0)
275                         p = &(*p)->rb_right;
276                 else
277                         BUG();
278         }
279
280         rb_link_node(ins, parent, p);
281         rb_insert_color(ins, root);
282 }
283
284 static struct rb_node *tree_search(struct rb_root *root,
285                                    struct rb_node *search,
286                                    int (*cmp)(struct rb_node *a,
287                                               struct rb_node *b, int fuzz),
288                                    int fuzz)
289 {
290         struct rb_node *n = root->rb_node;
291         int dir;
292
293         while (n) {
294                 dir = cmp(n, search, fuzz);
295                 if (dir < 0)
296                         n = n->rb_left;
297                 else if (dir > 0)
298                         n = n->rb_right;
299                 else
300                         return n;
301         }
302
303         return NULL;
304 }
305
306 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
307                                u64 *size, u64 *physical_dup)
308 {
309         struct fs_chunk *fs_chunk;
310         struct rb_node *entry;
311         struct fs_chunk search;
312         u64 offset;
313
314         if (logical == BTRFS_SUPER_INFO_OFFSET)
315                 return logical;
316
317         search.logical = logical;
318         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
319         if (!entry) {
320                 if (mdres->in != stdin)
321                         warning("cannot find a chunk, using logical");
322                 return logical;
323         }
324         fs_chunk = rb_entry(entry, struct fs_chunk, l);
325         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
326                 BUG();
327         offset = search.logical - fs_chunk->logical;
328
329         if (physical_dup) {
330                 /* Only in dup case, physical_dup is not equal to 0 */
331                 if (fs_chunk->physical_dup)
332                         *physical_dup = fs_chunk->physical_dup + offset;
333                 else
334                         *physical_dup = 0;
335         }
336
337         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
338         return fs_chunk->physical + offset;
339 }
340
341
342 static char *find_collision(struct metadump_struct *md, char *name,
343                             u32 name_len)
344 {
345         struct name *val;
346         struct rb_node *entry;
347         struct name tmp;
348         unsigned long checksum;
349         int found = 0;
350         int i;
351
352         tmp.val = name;
353         tmp.len = name_len;
354         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
355         if (entry) {
356                 val = rb_entry(entry, struct name, n);
357                 free(name);
358                 return val->sub;
359         }
360
361         val = malloc(sizeof(struct name));
362         if (!val) {
363                 error("cannot sanitize name, not enough memory");
364                 free(name);
365                 return NULL;
366         }
367
368         memset(val, 0, sizeof(*val));
369
370         val->val = name;
371         val->len = name_len;
372         val->sub = malloc(name_len);
373         if (!val->sub) {
374                 error("cannot sanitize name, not enough memory");
375                 free(val);
376                 free(name);
377                 return NULL;
378         }
379
380         checksum = crc32c(~1, val->val, name_len);
381         memset(val->sub, ' ', name_len);
382         i = 0;
383         while (1) {
384                 if (crc32c(~1, val->sub, name_len) == checksum &&
385                     memcmp(val->sub, val->val, val->len)) {
386                         found = 1;
387                         break;
388                 }
389
390                 if (val->sub[i] == 127) {
391                         do {
392                                 i++;
393                                 if (i >= name_len)
394                                         break;
395                         } while (val->sub[i] == 127);
396
397                         if (i >= name_len)
398                                 break;
399                         val->sub[i]++;
400                         if (val->sub[i] == '/')
401                                 val->sub[i]++;
402                         memset(val->sub, ' ', i);
403                         i = 0;
404                         continue;
405                 } else {
406                         val->sub[i]++;
407                         if (val->sub[i] == '/')
408                                 val->sub[i]++;
409                 }
410         }
411
412         if (!found) {
413                 warning(
414 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
415                         val->len, val->val);
416                 for (i = 0; i < name_len; i++) {
417                         char c = rand_range(94) + 33;
418
419                         if (c == '/')
420                                 c++;
421                         val->sub[i] = c;
422                 }
423         }
424
425         tree_insert(&md->name_tree, &val->n, name_cmp);
426         return val->sub;
427 }
428
429 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
430                               int slot)
431 {
432         struct btrfs_dir_item *dir_item;
433         char *buf;
434         char *garbage;
435         unsigned long name_ptr;
436         u32 total_len;
437         u32 cur = 0;
438         u32 this_len;
439         u32 name_len;
440         int free_garbage = (md->sanitize_names == 1);
441
442         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
443         total_len = btrfs_item_size_nr(eb, slot);
444         while (cur < total_len) {
445                 this_len = sizeof(*dir_item) +
446                         btrfs_dir_name_len(eb, dir_item) +
447                         btrfs_dir_data_len(eb, dir_item);
448                 name_ptr = (unsigned long)(dir_item + 1);
449                 name_len = btrfs_dir_name_len(eb, dir_item);
450
451                 if (md->sanitize_names > 1) {
452                         buf = malloc(name_len);
453                         if (!buf) {
454                                 error("cannot sanitize name, not enough memory");
455                                 return;
456                         }
457                         read_extent_buffer(eb, buf, name_ptr, name_len);
458                         garbage = find_collision(md, buf, name_len);
459                 } else {
460                         garbage = generate_garbage(name_len);
461                 }
462                 if (!garbage) {
463                         error("cannot sanitize name, not enough memory");
464                         return;
465                 }
466                 write_extent_buffer(eb, garbage, name_ptr, name_len);
467                 cur += this_len;
468                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
469                                                      this_len);
470                 if (free_garbage)
471                         free(garbage);
472         }
473 }
474
475 static void sanitize_inode_ref(struct metadump_struct *md,
476                                struct extent_buffer *eb, int slot, int ext)
477 {
478         struct btrfs_inode_extref *extref;
479         struct btrfs_inode_ref *ref;
480         char *garbage, *buf;
481         unsigned long ptr;
482         unsigned long name_ptr;
483         u32 item_size;
484         u32 cur_offset = 0;
485         int len;
486         int free_garbage = (md->sanitize_names == 1);
487
488         item_size = btrfs_item_size_nr(eb, slot);
489         ptr = btrfs_item_ptr_offset(eb, slot);
490         while (cur_offset < item_size) {
491                 if (ext) {
492                         extref = (struct btrfs_inode_extref *)(ptr +
493                                                                cur_offset);
494                         name_ptr = (unsigned long)(&extref->name);
495                         len = btrfs_inode_extref_name_len(eb, extref);
496                         cur_offset += sizeof(*extref);
497                 } else {
498                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
499                         len = btrfs_inode_ref_name_len(eb, ref);
500                         name_ptr = (unsigned long)(ref + 1);
501                         cur_offset += sizeof(*ref);
502                 }
503                 cur_offset += len;
504
505                 if (md->sanitize_names > 1) {
506                         buf = malloc(len);
507                         if (!buf) {
508                                 error("cannot sanitize name, not enough memory");
509                                 return;
510                         }
511                         read_extent_buffer(eb, buf, name_ptr, len);
512                         garbage = find_collision(md, buf, len);
513                 } else {
514                         garbage = generate_garbage(len);
515                 }
516
517                 if (!garbage) {
518                         error("cannot sanitize name, not enough memory");
519                         return;
520                 }
521                 write_extent_buffer(eb, garbage, name_ptr, len);
522                 if (free_garbage)
523                         free(garbage);
524         }
525 }
526
527 static void sanitize_xattr(struct metadump_struct *md,
528                            struct extent_buffer *eb, int slot)
529 {
530         struct btrfs_dir_item *dir_item;
531         unsigned long data_ptr;
532         u32 data_len;
533
534         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
535         data_len = btrfs_dir_data_len(eb, dir_item);
536
537         data_ptr = (unsigned long)((char *)(dir_item + 1) +
538                                    btrfs_dir_name_len(eb, dir_item));
539         memset_extent_buffer(eb, 0, data_ptr, data_len);
540 }
541
542 static void sanitize_name(struct metadump_struct *md, u8 *dst,
543                           struct extent_buffer *src, struct btrfs_key *key,
544                           int slot)
545 {
546         struct extent_buffer *eb;
547
548         eb = alloc_dummy_eb(src->start, src->len);
549         if (!eb) {
550                 error("cannot sanitize name, not enough memory");
551                 return;
552         }
553
554         memcpy(eb->data, src->data, src->len);
555
556         switch (key->type) {
557         case BTRFS_DIR_ITEM_KEY:
558         case BTRFS_DIR_INDEX_KEY:
559                 sanitize_dir_item(md, eb, slot);
560                 break;
561         case BTRFS_INODE_REF_KEY:
562                 sanitize_inode_ref(md, eb, slot, 0);
563                 break;
564         case BTRFS_INODE_EXTREF_KEY:
565                 sanitize_inode_ref(md, eb, slot, 1);
566                 break;
567         case BTRFS_XATTR_ITEM_KEY:
568                 sanitize_xattr(md, eb, slot);
569                 break;
570         default:
571                 break;
572         }
573
574         memcpy(dst, eb->data, eb->len);
575         free(eb);
576 }
577
578 /*
579  * zero inline extents and csum items
580  */
581 static void zero_items(struct metadump_struct *md, u8 *dst,
582                        struct extent_buffer *src)
583 {
584         struct btrfs_file_extent_item *fi;
585         struct btrfs_item *item;
586         struct btrfs_key key;
587         u32 nritems = btrfs_header_nritems(src);
588         size_t size;
589         unsigned long ptr;
590         int i, extent_type;
591
592         for (i = 0; i < nritems; i++) {
593                 item = btrfs_item_nr(i);
594                 btrfs_item_key_to_cpu(src, &key, i);
595                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
596                         size = btrfs_item_size_nr(src, i);
597                         memset(dst + btrfs_leaf_data(src) +
598                                btrfs_item_offset_nr(src, i), 0, size);
599                         continue;
600                 }
601
602                 if (md->sanitize_names && has_name(&key)) {
603                         sanitize_name(md, dst, src, &key, i);
604                         continue;
605                 }
606
607                 if (key.type != BTRFS_EXTENT_DATA_KEY)
608                         continue;
609
610                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
611                 extent_type = btrfs_file_extent_type(src, fi);
612                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
613                         continue;
614
615                 ptr = btrfs_file_extent_inline_start(fi);
616                 size = btrfs_file_extent_inline_item_len(src, item);
617                 memset(dst + ptr, 0, size);
618         }
619 }
620
621 /*
622  * copy buffer and zero useless data in the buffer
623  */
624 static void copy_buffer(struct metadump_struct *md, u8 *dst,
625                         struct extent_buffer *src)
626 {
627         int level;
628         size_t size;
629         u32 nritems;
630
631         memcpy(dst, src->data, src->len);
632         if (src->start == BTRFS_SUPER_INFO_OFFSET)
633                 return;
634
635         level = btrfs_header_level(src);
636         nritems = btrfs_header_nritems(src);
637
638         if (nritems == 0) {
639                 size = sizeof(struct btrfs_header);
640                 memset(dst + size, 0, src->len - size);
641         } else if (level == 0) {
642                 size = btrfs_leaf_data(src) +
643                         btrfs_item_offset_nr(src, nritems - 1) -
644                         btrfs_item_nr_offset(nritems);
645                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
646                 zero_items(md, dst, src);
647         } else {
648                 size = offsetof(struct btrfs_node, ptrs) +
649                         sizeof(struct btrfs_key_ptr) * nritems;
650                 memset(dst + size, 0, src->len - size);
651         }
652         csum_block(dst, src->len);
653 }
654
655 static void *dump_worker(void *data)
656 {
657         struct metadump_struct *md = (struct metadump_struct *)data;
658         struct async_work *async;
659         int ret;
660
661         while (1) {
662                 pthread_mutex_lock(&md->mutex);
663                 while (list_empty(&md->list)) {
664                         if (md->done) {
665                                 pthread_mutex_unlock(&md->mutex);
666                                 goto out;
667                         }
668                         pthread_cond_wait(&md->cond, &md->mutex);
669                 }
670                 async = list_entry(md->list.next, struct async_work, list);
671                 list_del_init(&async->list);
672                 pthread_mutex_unlock(&md->mutex);
673
674                 if (md->compress_level > 0) {
675                         u8 *orig = async->buffer;
676
677                         async->bufsize = compressBound(async->size);
678                         async->buffer = malloc(async->bufsize);
679                         if (!async->buffer) {
680                                 error("not enough memory for async buffer");
681                                 pthread_mutex_lock(&md->mutex);
682                                 if (!md->error)
683                                         md->error = -ENOMEM;
684                                 pthread_mutex_unlock(&md->mutex);
685                                 pthread_exit(NULL);
686                         }
687
688                         ret = compress2(async->buffer,
689                                          (unsigned long *)&async->bufsize,
690                                          orig, async->size, md->compress_level);
691
692                         if (ret != Z_OK)
693                                 async->error = 1;
694
695                         free(orig);
696                 }
697
698                 pthread_mutex_lock(&md->mutex);
699                 md->num_ready++;
700                 pthread_mutex_unlock(&md->mutex);
701         }
702 out:
703         pthread_exit(NULL);
704 }
705
706 static void meta_cluster_init(struct metadump_struct *md, u64 start)
707 {
708         struct meta_cluster_header *header;
709
710         md->num_items = 0;
711         md->num_ready = 0;
712         header = &md->cluster.header;
713         header->magic = cpu_to_le64(HEADER_MAGIC);
714         header->bytenr = cpu_to_le64(start);
715         header->nritems = cpu_to_le32(0);
716         header->compress = md->compress_level > 0 ?
717                            COMPRESS_ZLIB : COMPRESS_NONE;
718 }
719
720 static void metadump_destroy(struct metadump_struct *md, int num_threads)
721 {
722         int i;
723         struct rb_node *n;
724
725         pthread_mutex_lock(&md->mutex);
726         md->done = 1;
727         pthread_cond_broadcast(&md->cond);
728         pthread_mutex_unlock(&md->mutex);
729
730         for (i = 0; i < num_threads; i++)
731                 pthread_join(md->threads[i], NULL);
732
733         pthread_cond_destroy(&md->cond);
734         pthread_mutex_destroy(&md->mutex);
735
736         while ((n = rb_first(&md->name_tree))) {
737                 struct name *name;
738
739                 name = rb_entry(n, struct name, n);
740                 rb_erase(n, &md->name_tree);
741                 free(name->val);
742                 free(name->sub);
743                 free(name);
744         }
745 }
746
747 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
748                          FILE *out, int num_threads, int compress_level,
749                          int sanitize_names)
750 {
751         int i, ret = 0;
752
753         memset(md, 0, sizeof(*md));
754         INIT_LIST_HEAD(&md->list);
755         INIT_LIST_HEAD(&md->ordered);
756         md->root = root;
757         md->out = out;
758         md->pending_start = (u64)-1;
759         md->compress_level = compress_level;
760         md->sanitize_names = sanitize_names;
761         if (sanitize_names > 1)
762                 crc32c_optimization_init();
763
764         md->name_tree.rb_node = NULL;
765         md->num_threads = num_threads;
766         pthread_cond_init(&md->cond, NULL);
767         pthread_mutex_init(&md->mutex, NULL);
768         meta_cluster_init(md, 0);
769
770         if (!num_threads)
771                 return 0;
772
773         for (i = 0; i < num_threads; i++) {
774                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
775                 if (ret)
776                         break;
777         }
778
779         if (ret)
780                 metadump_destroy(md, i + 1);
781
782         return ret;
783 }
784
785 static int write_zero(FILE *out, size_t size)
786 {
787         static char zero[BLOCK_SIZE];
788         return fwrite(zero, size, 1, out);
789 }
790
791 static int write_buffers(struct metadump_struct *md, u64 *next)
792 {
793         struct meta_cluster_header *header = &md->cluster.header;
794         struct meta_cluster_item *item;
795         struct async_work *async;
796         u64 bytenr = 0;
797         u32 nritems = 0;
798         int ret;
799         int err = 0;
800
801         if (list_empty(&md->ordered))
802                 goto out;
803
804         /* wait until all buffers are compressed */
805         while (!err && md->num_items > md->num_ready) {
806                 struct timespec ts = {
807                         .tv_sec = 0,
808                         .tv_nsec = 10000000,
809                 };
810                 pthread_mutex_unlock(&md->mutex);
811                 nanosleep(&ts, NULL);
812                 pthread_mutex_lock(&md->mutex);
813                 err = md->error;
814         }
815
816         if (err) {
817                 error("one of the threads failed: %s", strerror(-err));
818                 goto out;
819         }
820
821         /* setup and write index block */
822         list_for_each_entry(async, &md->ordered, ordered) {
823                 item = &md->cluster.items[nritems];
824                 item->bytenr = cpu_to_le64(async->start);
825                 item->size = cpu_to_le32(async->bufsize);
826                 nritems++;
827         }
828         header->nritems = cpu_to_le32(nritems);
829
830         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
831         if (ret != 1) {
832                 error("unable to write out cluster: %s", strerror(errno));
833                 return -errno;
834         }
835
836         /* write buffers */
837         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
838         while (!list_empty(&md->ordered)) {
839                 async = list_entry(md->ordered.next, struct async_work,
840                                    ordered);
841                 list_del_init(&async->ordered);
842
843                 bytenr += async->bufsize;
844                 if (!err)
845                         ret = fwrite(async->buffer, async->bufsize, 1,
846                                      md->out);
847                 if (ret != 1) {
848                         error("unable to write out cluster: %s",
849                                 strerror(errno));
850                         err = -errno;
851                         ret = 0;
852                 }
853
854                 free(async->buffer);
855                 free(async);
856         }
857
858         /* zero unused space in the last block */
859         if (!err && bytenr & BLOCK_MASK) {
860                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
861
862                 bytenr += size;
863                 ret = write_zero(md->out, size);
864                 if (ret != 1) {
865                         error("unable to zero out buffer: %s",
866                                 strerror(errno));
867                         err = -errno;
868                 }
869         }
870 out:
871         *next = bytenr;
872         return err;
873 }
874
875 static int read_data_extent(struct metadump_struct *md,
876                             struct async_work *async)
877 {
878         struct btrfs_root *root = md->root;
879         u64 bytes_left = async->size;
880         u64 logical = async->start;
881         u64 offset = 0;
882         u64 read_len;
883         int num_copies;
884         int cur_mirror;
885         int ret;
886
887         num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, logical,
888                                       bytes_left);
889
890         /* Try our best to read data, just like read_tree_block() */
891         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
892                 while (bytes_left) {
893                         read_len = bytes_left;
894                         ret = read_extent_data(root,
895                                         (char *)(async->buffer + offset),
896                                         logical, &read_len, cur_mirror);
897                         if (ret < 0)
898                                 break;
899                         offset += read_len;
900                         logical += read_len;
901                         bytes_left -= read_len;
902                 }
903         }
904         if (bytes_left)
905                 return -EIO;
906         return 0;
907 }
908
909 static int get_dev_fd(struct btrfs_root *root)
910 {
911         struct btrfs_device *dev;
912
913         dev = list_first_entry(&root->fs_info->fs_devices->devices,
914                                struct btrfs_device, dev_list);
915         return dev->fd;
916 }
917
918 static int flush_pending(struct metadump_struct *md, int done)
919 {
920         struct async_work *async = NULL;
921         struct extent_buffer *eb;
922         u64 blocksize = md->root->fs_info->nodesize;
923         u64 start = 0;
924         u64 size;
925         size_t offset;
926         int ret = 0;
927
928         if (md->pending_size) {
929                 async = calloc(1, sizeof(*async));
930                 if (!async)
931                         return -ENOMEM;
932
933                 async->start = md->pending_start;
934                 async->size = md->pending_size;
935                 async->bufsize = async->size;
936                 async->buffer = malloc(async->bufsize);
937                 if (!async->buffer) {
938                         free(async);
939                         return -ENOMEM;
940                 }
941                 offset = 0;
942                 start = async->start;
943                 size = async->size;
944
945                 if (md->data) {
946                         ret = read_data_extent(md, async);
947                         if (ret) {
948                                 free(async->buffer);
949                                 free(async);
950                                 return ret;
951                         }
952                 }
953
954                 /*
955                  * Balance can make the mapping not cover the super block, so
956                  * just copy directly from one of the devices.
957                  */
958                 if (start == BTRFS_SUPER_INFO_OFFSET) {
959                         int fd = get_dev_fd(md->root);
960
961                         ret = pread64(fd, async->buffer, size, start);
962                         if (ret < size) {
963                                 free(async->buffer);
964                                 free(async);
965                                 error("unable to read superblock at %llu: %s",
966                                                 (unsigned long long)start,
967                                                 strerror(errno));
968                                 return -errno;
969                         }
970                         size = 0;
971                         ret = 0;
972                 }
973
974                 while (!md->data && size > 0) {
975                         u64 this_read = min(blocksize, size);
976                         eb = read_tree_block(md->root, start, this_read, 0);
977                         if (!extent_buffer_uptodate(eb)) {
978                                 free(async->buffer);
979                                 free(async);
980                                 error("unable to read metadata block %llu",
981                                         (unsigned long long)start);
982                                 return -EIO;
983                         }
984                         copy_buffer(md, async->buffer + offset, eb);
985                         free_extent_buffer(eb);
986                         start += this_read;
987                         offset += this_read;
988                         size -= this_read;
989                 }
990
991                 md->pending_start = (u64)-1;
992                 md->pending_size = 0;
993         } else if (!done) {
994                 return 0;
995         }
996
997         pthread_mutex_lock(&md->mutex);
998         if (async) {
999                 list_add_tail(&async->ordered, &md->ordered);
1000                 md->num_items++;
1001                 if (md->compress_level > 0) {
1002                         list_add_tail(&async->list, &md->list);
1003                         pthread_cond_signal(&md->cond);
1004                 } else {
1005                         md->num_ready++;
1006                 }
1007         }
1008         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1009                 ret = write_buffers(md, &start);
1010                 if (ret)
1011                         error("unable to write buffers: %s", strerror(-ret));
1012                 else
1013                         meta_cluster_init(md, start);
1014         }
1015         pthread_mutex_unlock(&md->mutex);
1016         return ret;
1017 }
1018
1019 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1020                       int data)
1021 {
1022         int ret;
1023         if (md->data != data ||
1024             md->pending_size + size > MAX_PENDING_SIZE ||
1025             md->pending_start + md->pending_size != start) {
1026                 ret = flush_pending(md, 0);
1027                 if (ret)
1028                         return ret;
1029                 md->pending_start = start;
1030         }
1031         readahead_tree_block(md->root, start, size, 0);
1032         md->pending_size += size;
1033         md->data = data;
1034         return 0;
1035 }
1036
1037 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1038 static int is_tree_block(struct btrfs_root *extent_root,
1039                          struct btrfs_path *path, u64 bytenr)
1040 {
1041         struct extent_buffer *leaf;
1042         struct btrfs_key key;
1043         u64 ref_objectid;
1044         int ret;
1045
1046         leaf = path->nodes[0];
1047         while (1) {
1048                 struct btrfs_extent_ref_v0 *ref_item;
1049                 path->slots[0]++;
1050                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1051                         ret = btrfs_next_leaf(extent_root, path);
1052                         if (ret < 0)
1053                                 return ret;
1054                         if (ret > 0)
1055                                 break;
1056                         leaf = path->nodes[0];
1057                 }
1058                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1059                 if (key.objectid != bytenr)
1060                         break;
1061                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1062                         continue;
1063                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1064                                           struct btrfs_extent_ref_v0);
1065                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1066                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1067                         return 1;
1068                 break;
1069         }
1070         return 0;
1071 }
1072 #endif
1073
1074 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1075                             struct metadump_struct *metadump, int root_tree)
1076 {
1077         struct extent_buffer *tmp;
1078         struct btrfs_root_item *ri;
1079         struct btrfs_key key;
1080         u64 bytenr;
1081         int level;
1082         int nritems = 0;
1083         int i = 0;
1084         int ret;
1085
1086         ret = add_extent(btrfs_header_bytenr(eb), root->fs_info->nodesize,
1087                          metadump, 0);
1088         if (ret) {
1089                 error("unable to add metadata block %llu: %d",
1090                                 btrfs_header_bytenr(eb), ret);
1091                 return ret;
1092         }
1093
1094         if (btrfs_header_level(eb) == 0 && !root_tree)
1095                 return 0;
1096
1097         level = btrfs_header_level(eb);
1098         nritems = btrfs_header_nritems(eb);
1099         for (i = 0; i < nritems; i++) {
1100                 if (level == 0) {
1101                         btrfs_item_key_to_cpu(eb, &key, i);
1102                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1103                                 continue;
1104                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1105                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1106                         tmp = read_tree_block(root, bytenr,
1107                                               root->fs_info->nodesize, 0);
1108                         if (!extent_buffer_uptodate(tmp)) {
1109                                 error("unable to read log root block");
1110                                 return -EIO;
1111                         }
1112                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1113                         free_extent_buffer(tmp);
1114                         if (ret)
1115                                 return ret;
1116                 } else {
1117                         bytenr = btrfs_node_blockptr(eb, i);
1118                         tmp = read_tree_block(root, bytenr,
1119                                               root->fs_info->nodesize, 0);
1120                         if (!extent_buffer_uptodate(tmp)) {
1121                                 error("unable to read log root block");
1122                                 return -EIO;
1123                         }
1124                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1125                         free_extent_buffer(tmp);
1126                         if (ret)
1127                                 return ret;
1128                 }
1129         }
1130
1131         return 0;
1132 }
1133
1134 static int copy_log_trees(struct btrfs_root *root,
1135                           struct metadump_struct *metadump)
1136 {
1137         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1138
1139         if (blocknr == 0)
1140                 return 0;
1141
1142         if (!root->fs_info->log_root_tree ||
1143             !root->fs_info->log_root_tree->node) {
1144                 error("unable to copy tree log, it has not been setup");
1145                 return -EIO;
1146         }
1147
1148         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1149                                 metadump, 1);
1150 }
1151
1152 static int copy_space_cache(struct btrfs_root *root,
1153                             struct metadump_struct *metadump,
1154                             struct btrfs_path *path)
1155 {
1156         struct extent_buffer *leaf;
1157         struct btrfs_file_extent_item *fi;
1158         struct btrfs_key key;
1159         u64 bytenr, num_bytes;
1160         int ret;
1161
1162         root = root->fs_info->tree_root;
1163
1164         key.objectid = 0;
1165         key.type = BTRFS_EXTENT_DATA_KEY;
1166         key.offset = 0;
1167
1168         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1169         if (ret < 0) {
1170                 error("free space inode not found: %d", ret);
1171                 return ret;
1172         }
1173
1174         leaf = path->nodes[0];
1175
1176         while (1) {
1177                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1178                         ret = btrfs_next_leaf(root, path);
1179                         if (ret < 0) {
1180                                 error("cannot go to next leaf %d", ret);
1181                                 return ret;
1182                         }
1183                         if (ret > 0)
1184                                 break;
1185                         leaf = path->nodes[0];
1186                 }
1187
1188                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1189                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1190                         path->slots[0]++;
1191                         continue;
1192                 }
1193
1194                 fi = btrfs_item_ptr(leaf, path->slots[0],
1195                                     struct btrfs_file_extent_item);
1196                 if (btrfs_file_extent_type(leaf, fi) !=
1197                     BTRFS_FILE_EXTENT_REG) {
1198                         path->slots[0]++;
1199                         continue;
1200                 }
1201
1202                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1203                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1204                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1205                 if (ret) {
1206                         error("unable to add space cache blocks %d", ret);
1207                         btrfs_release_path(path);
1208                         return ret;
1209                 }
1210                 path->slots[0]++;
1211         }
1212
1213         return 0;
1214 }
1215
1216 static int copy_from_extent_tree(struct metadump_struct *metadump,
1217                                  struct btrfs_path *path)
1218 {
1219         struct btrfs_root *extent_root;
1220         struct extent_buffer *leaf;
1221         struct btrfs_extent_item *ei;
1222         struct btrfs_key key;
1223         u64 bytenr;
1224         u64 num_bytes;
1225         int ret;
1226
1227         extent_root = metadump->root->fs_info->extent_root;
1228         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1229         key.objectid = bytenr;
1230         key.type = BTRFS_EXTENT_ITEM_KEY;
1231         key.offset = 0;
1232
1233         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1234         if (ret < 0) {
1235                 error("extent root not found: %d", ret);
1236                 return ret;
1237         }
1238         ret = 0;
1239
1240         leaf = path->nodes[0];
1241
1242         while (1) {
1243                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1244                         ret = btrfs_next_leaf(extent_root, path);
1245                         if (ret < 0) {
1246                                 error("cannot go to next leaf %d", ret);
1247                                 break;
1248                         }
1249                         if (ret > 0) {
1250                                 ret = 0;
1251                                 break;
1252                         }
1253                         leaf = path->nodes[0];
1254                 }
1255
1256                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1257                 if (key.objectid < bytenr ||
1258                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1259                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1260                         path->slots[0]++;
1261                         continue;
1262                 }
1263
1264                 bytenr = key.objectid;
1265                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1266                         num_bytes = extent_root->fs_info->nodesize;
1267                 } else {
1268                         num_bytes = key.offset;
1269                 }
1270
1271                 if (num_bytes == 0) {
1272                         error("extent length 0 at bytenr %llu key type %d",
1273                                         (unsigned long long)bytenr, key.type);
1274                         ret = -EIO;
1275                         break;
1276                 }
1277
1278                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1279                         ei = btrfs_item_ptr(leaf, path->slots[0],
1280                                             struct btrfs_extent_item);
1281                         if (btrfs_extent_flags(leaf, ei) &
1282                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1283                                 ret = add_extent(bytenr, num_bytes, metadump,
1284                                                  0);
1285                                 if (ret) {
1286                                         error("unable to add block %llu: %d",
1287                                                 (unsigned long long)bytenr, ret);
1288                                         break;
1289                                 }
1290                         }
1291                 } else {
1292 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1293                         ret = is_tree_block(extent_root, path, bytenr);
1294                         if (ret < 0) {
1295                                 error("failed to check tree block %llu: %d",
1296                                         (unsigned long long)bytenr, ret);
1297                                 break;
1298                         }
1299
1300                         if (ret) {
1301                                 ret = add_extent(bytenr, num_bytes, metadump,
1302                                                  0);
1303                                 if (ret) {
1304                                         error("unable to add block %llu: %d",
1305                                                 (unsigned long long)bytenr, ret);
1306                                         break;
1307                                 }
1308                         }
1309                         ret = 0;
1310 #else
1311                         error(
1312         "either extent tree is corrupted or you haven't built with V0 support");
1313                         ret = -EIO;
1314                         break;
1315 #endif
1316                 }
1317                 bytenr += num_bytes;
1318         }
1319
1320         btrfs_release_path(path);
1321
1322         return ret;
1323 }
1324
1325 static int create_metadump(const char *input, FILE *out, int num_threads,
1326                            int compress_level, int sanitize, int walk_trees)
1327 {
1328         struct btrfs_root *root;
1329         struct btrfs_path path;
1330         struct metadump_struct metadump;
1331         int ret;
1332         int err = 0;
1333
1334         root = open_ctree(input, 0, 0);
1335         if (!root) {
1336                 error("open ctree failed");
1337                 return -EIO;
1338         }
1339
1340         ret = metadump_init(&metadump, root, out, num_threads,
1341                             compress_level, sanitize);
1342         if (ret) {
1343                 error("failed to initialize metadump: %d", ret);
1344                 close_ctree(root);
1345                 return ret;
1346         }
1347
1348         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1349                         &metadump, 0);
1350         if (ret) {
1351                 error("unable to add metadata: %d", ret);
1352                 err = ret;
1353                 goto out;
1354         }
1355
1356         btrfs_init_path(&path);
1357
1358         if (walk_trees) {
1359                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1360                                        &metadump, 1);
1361                 if (ret) {
1362                         err = ret;
1363                         goto out;
1364                 }
1365
1366                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1367                                        &metadump, 1);
1368                 if (ret) {
1369                         err = ret;
1370                         goto out;
1371                 }
1372         } else {
1373                 ret = copy_from_extent_tree(&metadump, &path);
1374                 if (ret) {
1375                         err = ret;
1376                         goto out;
1377                 }
1378         }
1379
1380         ret = copy_log_trees(root, &metadump);
1381         if (ret) {
1382                 err = ret;
1383                 goto out;
1384         }
1385
1386         ret = copy_space_cache(root, &metadump, &path);
1387 out:
1388         ret = flush_pending(&metadump, 1);
1389         if (ret) {
1390                 if (!err)
1391                         err = ret;
1392                 error("failed to flush pending data: %d", ret);
1393         }
1394
1395         metadump_destroy(&metadump, num_threads);
1396
1397         btrfs_release_path(&path);
1398         ret = close_ctree(root);
1399         return err ? err : ret;
1400 }
1401
1402 static void update_super_old(u8 *buffer)
1403 {
1404         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1405         struct btrfs_chunk *chunk;
1406         struct btrfs_disk_key *key;
1407         u32 sectorsize = btrfs_super_sectorsize(super);
1408         u64 flags = btrfs_super_flags(super);
1409
1410         flags |= BTRFS_SUPER_FLAG_METADUMP;
1411         btrfs_set_super_flags(super, flags);
1412
1413         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1414         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1415                                        sizeof(struct btrfs_disk_key));
1416
1417         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1418         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1419         btrfs_set_disk_key_offset(key, 0);
1420
1421         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1422         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1423         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1424         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1425         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1426         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1427         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1428         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1429         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1430         chunk->stripe.devid = super->dev_item.devid;
1431         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1432         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1433         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1434         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1435 }
1436
1437 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1438 {
1439         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1440         struct btrfs_chunk *chunk;
1441         struct btrfs_disk_key *disk_key;
1442         struct btrfs_key key;
1443         u64 flags = btrfs_super_flags(super);
1444         u32 new_array_size = 0;
1445         u32 array_size;
1446         u32 cur = 0;
1447         u8 *ptr, *write_ptr;
1448         int old_num_stripes;
1449
1450         write_ptr = ptr = super->sys_chunk_array;
1451         array_size = btrfs_super_sys_array_size(super);
1452
1453         while (cur < array_size) {
1454                 disk_key = (struct btrfs_disk_key *)ptr;
1455                 btrfs_disk_key_to_cpu(&key, disk_key);
1456
1457                 new_array_size += sizeof(*disk_key);
1458                 memmove(write_ptr, ptr, sizeof(*disk_key));
1459
1460                 write_ptr += sizeof(*disk_key);
1461                 ptr += sizeof(*disk_key);
1462                 cur += sizeof(*disk_key);
1463
1464                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1465                         u64 type, physical, physical_dup, size = 0;
1466
1467                         chunk = (struct btrfs_chunk *)ptr;
1468                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1469                         chunk = (struct btrfs_chunk *)write_ptr;
1470
1471                         memmove(write_ptr, ptr, sizeof(*chunk));
1472                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1473                         type = btrfs_stack_chunk_type(chunk);
1474                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1475                                 new_array_size += sizeof(struct btrfs_stripe);
1476                                 write_ptr += sizeof(struct btrfs_stripe);
1477                         } else {
1478                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1479                                 btrfs_set_stack_chunk_type(chunk,
1480                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1481                         }
1482                         chunk->stripe.devid = super->dev_item.devid;
1483                         physical = logical_to_physical(mdres, key.offset,
1484                                                        &size, &physical_dup);
1485                         if (size != (u64)-1)
1486                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1487                                                               physical);
1488                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1489                                BTRFS_UUID_SIZE);
1490                         new_array_size += sizeof(*chunk);
1491                 } else {
1492                         error("bogus key in the sys array %d", key.type);
1493                         return -EIO;
1494                 }
1495                 write_ptr += sizeof(*chunk);
1496                 ptr += btrfs_chunk_item_size(old_num_stripes);
1497                 cur += btrfs_chunk_item_size(old_num_stripes);
1498         }
1499
1500         if (mdres->clear_space_cache)
1501                 btrfs_set_super_cache_generation(super, 0);
1502
1503         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1504         btrfs_set_super_flags(super, flags);
1505         btrfs_set_super_sys_array_size(super, new_array_size);
1506         btrfs_set_super_num_devices(super, 1);
1507         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1508
1509         return 0;
1510 }
1511
1512 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1513 {
1514         struct extent_buffer *eb;
1515
1516         eb = calloc(1, sizeof(struct extent_buffer) + size);
1517         if (!eb)
1518                 return NULL;
1519
1520         eb->start = bytenr;
1521         eb->len = size;
1522         return eb;
1523 }
1524
1525 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1526 {
1527         struct btrfs_item *item;
1528         u32 nritems;
1529         u32 old_size;
1530         u32 old_data_start;
1531         u32 size_diff;
1532         u32 data_end;
1533         int i;
1534
1535         old_size = btrfs_item_size_nr(eb, slot);
1536         if (old_size == new_size)
1537                 return;
1538
1539         nritems = btrfs_header_nritems(eb);
1540         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1541
1542         old_data_start = btrfs_item_offset_nr(eb, slot);
1543         size_diff = old_size - new_size;
1544
1545         for (i = slot; i < nritems; i++) {
1546                 u32 ioff;
1547                 item = btrfs_item_nr(i);
1548                 ioff = btrfs_item_offset(eb, item);
1549                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1550         }
1551
1552         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1553                               btrfs_leaf_data(eb) + data_end,
1554                               old_data_start + new_size - data_end);
1555         item = btrfs_item_nr(slot);
1556         btrfs_set_item_size(eb, item, new_size);
1557 }
1558
1559 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1560                                   struct async_work *async, u8 *buffer,
1561                                   size_t size)
1562 {
1563         struct extent_buffer *eb;
1564         size_t size_left = size;
1565         u64 bytenr = async->start;
1566         int i;
1567
1568         if (size_left % mdres->nodesize)
1569                 return 0;
1570
1571         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1572         if (!eb)
1573                 return -ENOMEM;
1574
1575         while (size_left) {
1576                 eb->start = bytenr;
1577                 memcpy(eb->data, buffer, mdres->nodesize);
1578
1579                 if (btrfs_header_bytenr(eb) != bytenr)
1580                         break;
1581                 if (memcmp(mdres->fsid,
1582                            eb->data + offsetof(struct btrfs_header, fsid),
1583                            BTRFS_FSID_SIZE))
1584                         break;
1585
1586                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1587                         goto next;
1588
1589                 if (btrfs_header_level(eb) != 0)
1590                         goto next;
1591
1592                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1593                         struct btrfs_chunk *chunk;
1594                         struct btrfs_key key;
1595                         u64 type, physical, physical_dup, size = (u64)-1;
1596
1597                         btrfs_item_key_to_cpu(eb, &key, i);
1598                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1599                                 continue;
1600
1601                         size = 0;
1602                         physical = logical_to_physical(mdres, key.offset,
1603                                                        &size, &physical_dup);
1604
1605                         if (!physical_dup)
1606                                 truncate_item(eb, i, sizeof(*chunk));
1607                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1608
1609
1610                         /* Zero out the RAID profile */
1611                         type = btrfs_chunk_type(eb, chunk);
1612                         type &= (BTRFS_BLOCK_GROUP_DATA |
1613                                  BTRFS_BLOCK_GROUP_SYSTEM |
1614                                  BTRFS_BLOCK_GROUP_METADATA |
1615                                  BTRFS_BLOCK_GROUP_DUP);
1616                         btrfs_set_chunk_type(eb, chunk, type);
1617
1618                         if (!physical_dup)
1619                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1620                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1621                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1622                         if (size != (u64)-1)
1623                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1624                                                            physical);
1625                         /* update stripe 2 offset */
1626                         if (physical_dup)
1627                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1628                                                            physical_dup);
1629
1630                         write_extent_buffer(eb, mdres->uuid,
1631                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1632                                                 chunk, 0),
1633                                         BTRFS_UUID_SIZE);
1634                 }
1635                 memcpy(buffer, eb->data, eb->len);
1636                 csum_block(buffer, eb->len);
1637 next:
1638                 size_left -= mdres->nodesize;
1639                 buffer += mdres->nodesize;
1640                 bytenr += mdres->nodesize;
1641         }
1642
1643         free(eb);
1644         return 0;
1645 }
1646
1647 static void write_backup_supers(int fd, u8 *buf)
1648 {
1649         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1650         struct stat st;
1651         u64 size;
1652         u64 bytenr;
1653         int i;
1654         int ret;
1655
1656         if (fstat(fd, &st)) {
1657                 error(
1658         "cannot stat restore point, won't be able to write backup supers: %s",
1659                         strerror(errno));
1660                 return;
1661         }
1662
1663         size = btrfs_device_size(fd, &st);
1664
1665         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1666                 bytenr = btrfs_sb_offset(i);
1667                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1668                         break;
1669                 btrfs_set_super_bytenr(super, bytenr);
1670                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1671                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1672                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1673                         if (ret < 0)
1674                                 error(
1675                                 "problem writing out backup super block %d: %s",
1676                                                 i, strerror(errno));
1677                         else
1678                                 error("short write writing out backup super block");
1679                         break;
1680                 }
1681         }
1682 }
1683
1684 static void *restore_worker(void *data)
1685 {
1686         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1687         struct async_work *async;
1688         size_t size;
1689         u8 *buffer;
1690         u8 *outbuf;
1691         int outfd;
1692         int ret;
1693         int compress_size = MAX_PENDING_SIZE * 4;
1694
1695         outfd = fileno(mdres->out);
1696         buffer = malloc(compress_size);
1697         if (!buffer) {
1698                 error("not enough memory for restore worker buffer");
1699                 pthread_mutex_lock(&mdres->mutex);
1700                 if (!mdres->error)
1701                         mdres->error = -ENOMEM;
1702                 pthread_mutex_unlock(&mdres->mutex);
1703                 pthread_exit(NULL);
1704         }
1705
1706         while (1) {
1707                 u64 bytenr, physical_dup;
1708                 off_t offset = 0;
1709                 int err = 0;
1710
1711                 pthread_mutex_lock(&mdres->mutex);
1712                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1713                         if (mdres->done) {
1714                                 pthread_mutex_unlock(&mdres->mutex);
1715                                 goto out;
1716                         }
1717                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1718                 }
1719                 async = list_entry(mdres->list.next, struct async_work, list);
1720                 list_del_init(&async->list);
1721                 pthread_mutex_unlock(&mdres->mutex);
1722
1723                 if (mdres->compress_method == COMPRESS_ZLIB) {
1724                         size = compress_size; 
1725                         ret = uncompress(buffer, (unsigned long *)&size,
1726                                          async->buffer, async->bufsize);
1727                         if (ret != Z_OK) {
1728                                 error("decompressiion failed with %d", ret);
1729                                 err = -EIO;
1730                         }
1731                         outbuf = buffer;
1732                 } else {
1733                         outbuf = async->buffer;
1734                         size = async->bufsize;
1735                 }
1736
1737                 if (!mdres->multi_devices) {
1738                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1739                                 if (mdres->old_restore) {
1740                                         update_super_old(outbuf);
1741                                 } else {
1742                                         ret = update_super(mdres, outbuf);
1743                                         if (ret)
1744                                                 err = ret;
1745                                 }
1746                         } else if (!mdres->old_restore) {
1747                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1748                                 if (ret)
1749                                         err = ret;
1750                         }
1751                 }
1752
1753                 if (!mdres->fixup_offset) {
1754                         while (size) {
1755                                 u64 chunk_size = size;
1756                                 physical_dup = 0;
1757                                 if (!mdres->multi_devices && !mdres->old_restore)
1758                                         bytenr = logical_to_physical(mdres,
1759                                                      async->start + offset,
1760                                                      &chunk_size,
1761                                                      &physical_dup);
1762                                 else
1763                                         bytenr = async->start + offset;
1764
1765                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1766                                                bytenr);
1767                                 if (ret != chunk_size)
1768                                         goto error;
1769
1770                                 if (physical_dup)
1771                                         ret = pwrite64(outfd, outbuf+offset,
1772                                                        chunk_size,
1773                                                        physical_dup);
1774                                 if (ret != chunk_size)
1775                                         goto error;
1776
1777                                 size -= chunk_size;
1778                                 offset += chunk_size;
1779                                 continue;
1780
1781 error:
1782                                 if (ret < 0) {
1783                                         error("unable to write to device: %s",
1784                                                         strerror(errno));
1785                                         err = errno;
1786                                 } else {
1787                                         error("short write");
1788                                         err = -EIO;
1789                                 }
1790                         }
1791                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1792                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1793                         if (ret) {
1794                                 error("failed to write data");
1795                                 exit(1);
1796                         }
1797                 }
1798
1799
1800                 /* backup super blocks are already there at fixup_offset stage */
1801                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1802                         write_backup_supers(outfd, outbuf);
1803
1804                 pthread_mutex_lock(&mdres->mutex);
1805                 if (err && !mdres->error)
1806                         mdres->error = err;
1807                 mdres->num_items--;
1808                 pthread_mutex_unlock(&mdres->mutex);
1809
1810                 free(async->buffer);
1811                 free(async);
1812         }
1813 out:
1814         free(buffer);
1815         pthread_exit(NULL);
1816 }
1817
1818 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1819 {
1820         struct rb_node *n;
1821         int i;
1822
1823         while ((n = rb_first(&mdres->chunk_tree))) {
1824                 struct fs_chunk *entry;
1825
1826                 entry = rb_entry(n, struct fs_chunk, l);
1827                 rb_erase(n, &mdres->chunk_tree);
1828                 rb_erase(&entry->p, &mdres->physical_tree);
1829                 free(entry);
1830         }
1831         pthread_mutex_lock(&mdres->mutex);
1832         mdres->done = 1;
1833         pthread_cond_broadcast(&mdres->cond);
1834         pthread_mutex_unlock(&mdres->mutex);
1835
1836         for (i = 0; i < num_threads; i++)
1837                 pthread_join(mdres->threads[i], NULL);
1838
1839         pthread_cond_destroy(&mdres->cond);
1840         pthread_mutex_destroy(&mdres->mutex);
1841 }
1842
1843 static int mdrestore_init(struct mdrestore_struct *mdres,
1844                           FILE *in, FILE *out, int old_restore,
1845                           int num_threads, int fixup_offset,
1846                           struct btrfs_fs_info *info, int multi_devices)
1847 {
1848         int i, ret = 0;
1849
1850         memset(mdres, 0, sizeof(*mdres));
1851         pthread_cond_init(&mdres->cond, NULL);
1852         pthread_mutex_init(&mdres->mutex, NULL);
1853         INIT_LIST_HEAD(&mdres->list);
1854         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1855         mdres->in = in;
1856         mdres->out = out;
1857         mdres->old_restore = old_restore;
1858         mdres->chunk_tree.rb_node = NULL;
1859         mdres->fixup_offset = fixup_offset;
1860         mdres->info = info;
1861         mdres->multi_devices = multi_devices;
1862         mdres->clear_space_cache = 0;
1863         mdres->last_physical_offset = 0;
1864         mdres->alloced_chunks = 0;
1865
1866         if (!num_threads)
1867                 return 0;
1868
1869         mdres->num_threads = num_threads;
1870         for (i = 0; i < num_threads; i++) {
1871                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1872                                      mdres);
1873                 if (ret) {
1874                         /* pthread_create returns errno directly */
1875                         ret = -ret;
1876                         break;
1877                 }
1878         }
1879         if (ret)
1880                 mdrestore_destroy(mdres, i + 1);
1881         return ret;
1882 }
1883
1884 static int fill_mdres_info(struct mdrestore_struct *mdres,
1885                            struct async_work *async)
1886 {
1887         struct btrfs_super_block *super;
1888         u8 *buffer = NULL;
1889         u8 *outbuf;
1890         int ret;
1891
1892         /* We've already been initialized */
1893         if (mdres->nodesize)
1894                 return 0;
1895
1896         if (mdres->compress_method == COMPRESS_ZLIB) {
1897                 size_t size = MAX_PENDING_SIZE * 2;
1898
1899                 buffer = malloc(MAX_PENDING_SIZE * 2);
1900                 if (!buffer)
1901                         return -ENOMEM;
1902                 ret = uncompress(buffer, (unsigned long *)&size,
1903                                  async->buffer, async->bufsize);
1904                 if (ret != Z_OK) {
1905                         error("decompressiion failed with %d", ret);
1906                         free(buffer);
1907                         return -EIO;
1908                 }
1909                 outbuf = buffer;
1910         } else {
1911                 outbuf = async->buffer;
1912         }
1913
1914         super = (struct btrfs_super_block *)outbuf;
1915         mdres->nodesize = btrfs_super_nodesize(super);
1916         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1917         memcpy(mdres->uuid, super->dev_item.uuid,
1918                        BTRFS_UUID_SIZE);
1919         mdres->devid = le64_to_cpu(super->dev_item.devid);
1920         free(buffer);
1921         return 0;
1922 }
1923
1924 static int add_cluster(struct meta_cluster *cluster,
1925                        struct mdrestore_struct *mdres, u64 *next)
1926 {
1927         struct meta_cluster_item *item;
1928         struct meta_cluster_header *header = &cluster->header;
1929         struct async_work *async;
1930         u64 bytenr;
1931         u32 i, nritems;
1932         int ret;
1933
1934         mdres->compress_method = header->compress;
1935
1936         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1937         nritems = le32_to_cpu(header->nritems);
1938         for (i = 0; i < nritems; i++) {
1939                 item = &cluster->items[i];
1940                 async = calloc(1, sizeof(*async));
1941                 if (!async) {
1942                         error("not enough memory for async data");
1943                         return -ENOMEM;
1944                 }
1945                 async->start = le64_to_cpu(item->bytenr);
1946                 async->bufsize = le32_to_cpu(item->size);
1947                 async->buffer = malloc(async->bufsize);
1948                 if (!async->buffer) {
1949                         error("not enough memory for async buffer");
1950                         free(async);
1951                         return -ENOMEM;
1952                 }
1953                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1954                 if (ret != 1) {
1955                         error("unable to read buffer: %s", strerror(errno));
1956                         free(async->buffer);
1957                         free(async);
1958                         return -EIO;
1959                 }
1960                 bytenr += async->bufsize;
1961
1962                 pthread_mutex_lock(&mdres->mutex);
1963                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1964                         ret = fill_mdres_info(mdres, async);
1965                         if (ret) {
1966                                 error("unable to set up restore state");
1967                                 pthread_mutex_unlock(&mdres->mutex);
1968                                 free(async->buffer);
1969                                 free(async);
1970                                 return ret;
1971                         }
1972                 }
1973                 list_add_tail(&async->list, &mdres->list);
1974                 mdres->num_items++;
1975                 pthread_cond_signal(&mdres->cond);
1976                 pthread_mutex_unlock(&mdres->mutex);
1977         }
1978         if (bytenr & BLOCK_MASK) {
1979                 char buffer[BLOCK_MASK];
1980                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1981
1982                 bytenr += size;
1983                 ret = fread(buffer, size, 1, mdres->in);
1984                 if (ret != 1) {
1985                         error("failed to read buffer: %s", strerror(errno));
1986                         return -EIO;
1987                 }
1988         }
1989         *next = bytenr;
1990         return 0;
1991 }
1992
1993 static int wait_for_worker(struct mdrestore_struct *mdres)
1994 {
1995         int ret = 0;
1996
1997         pthread_mutex_lock(&mdres->mutex);
1998         ret = mdres->error;
1999         while (!ret && mdres->num_items > 0) {
2000                 struct timespec ts = {
2001                         .tv_sec = 0,
2002                         .tv_nsec = 10000000,
2003                 };
2004                 pthread_mutex_unlock(&mdres->mutex);
2005                 nanosleep(&ts, NULL);
2006                 pthread_mutex_lock(&mdres->mutex);
2007                 ret = mdres->error;
2008         }
2009         pthread_mutex_unlock(&mdres->mutex);
2010         return ret;
2011 }
2012
2013 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2014                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2015                             u64 cluster_bytenr)
2016 {
2017         struct extent_buffer *eb;
2018         int ret = 0;
2019         int i;
2020
2021         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2022         if (!eb) {
2023                 ret = -ENOMEM;
2024                 goto out;
2025         }
2026
2027         while (item_bytenr != bytenr) {
2028                 buffer += mdres->nodesize;
2029                 item_bytenr += mdres->nodesize;
2030         }
2031
2032         memcpy(eb->data, buffer, mdres->nodesize);
2033         if (btrfs_header_bytenr(eb) != bytenr) {
2034                 error("eb bytenr does not match found bytenr: %llu != %llu",
2035                                 (unsigned long long)btrfs_header_bytenr(eb),
2036                                 (unsigned long long)bytenr);
2037                 ret = -EIO;
2038                 goto out;
2039         }
2040
2041         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2042                    BTRFS_FSID_SIZE)) {
2043                 error("filesystem UUID of eb %llu does not match",
2044                                 (unsigned long long)bytenr);
2045                 ret = -EIO;
2046                 goto out;
2047         }
2048
2049         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2050                 error("wrong eb %llu owner %llu",
2051                                 (unsigned long long)bytenr,
2052                                 (unsigned long long)btrfs_header_owner(eb));
2053                 ret = -EIO;
2054                 goto out;
2055         }
2056
2057         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2058                 struct btrfs_chunk *chunk;
2059                 struct fs_chunk *fs_chunk;
2060                 struct btrfs_key key;
2061                 u64 type;
2062
2063                 if (btrfs_header_level(eb)) {
2064                         u64 blockptr = btrfs_node_blockptr(eb, i);
2065
2066                         ret = search_for_chunk_blocks(mdres, blockptr,
2067                                                       cluster_bytenr);
2068                         if (ret)
2069                                 break;
2070                         continue;
2071                 }
2072
2073                 /* Yay a leaf!  We loves leafs! */
2074                 btrfs_item_key_to_cpu(eb, &key, i);
2075                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2076                         continue;
2077
2078                 fs_chunk = malloc(sizeof(struct fs_chunk));
2079                 if (!fs_chunk) {
2080                         error("not enough memory to allocate chunk");
2081                         ret = -ENOMEM;
2082                         break;
2083                 }
2084                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2085                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2086
2087                 fs_chunk->logical = key.offset;
2088                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2089                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2090                 INIT_LIST_HEAD(&fs_chunk->list);
2091                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2092                                 physical_cmp, 1) != NULL)
2093                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2094                 else
2095                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2096                                     physical_cmp);
2097
2098                 type = btrfs_chunk_type(eb, chunk);
2099                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2100                         fs_chunk->physical_dup =
2101                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2102                 }
2103
2104                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2105                     mdres->last_physical_offset)
2106                         mdres->last_physical_offset = fs_chunk->physical_dup +
2107                                 fs_chunk->bytes;
2108                 else if (fs_chunk->physical + fs_chunk->bytes >
2109                     mdres->last_physical_offset)
2110                         mdres->last_physical_offset = fs_chunk->physical +
2111                                 fs_chunk->bytes;
2112                 mdres->alloced_chunks += fs_chunk->bytes;
2113                 /* in dup case, fs_chunk->bytes should add twice */
2114                 if (fs_chunk->physical_dup)
2115                         mdres->alloced_chunks += fs_chunk->bytes;
2116                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2117         }
2118 out:
2119         free(eb);
2120         return ret;
2121 }
2122
2123 /* If you have to ask you aren't worthy */
2124 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2125                                    u64 search, u64 cluster_bytenr)
2126 {
2127         struct meta_cluster *cluster;
2128         struct meta_cluster_header *header;
2129         struct meta_cluster_item *item;
2130         u64 current_cluster = cluster_bytenr, bytenr;
2131         u64 item_bytenr;
2132         u32 bufsize, nritems, i;
2133         u32 max_size = MAX_PENDING_SIZE * 2;
2134         u8 *buffer, *tmp = NULL;
2135         int ret = 0;
2136
2137         cluster = malloc(BLOCK_SIZE);
2138         if (!cluster) {
2139                 error("not enough memory for cluster");
2140                 return -ENOMEM;
2141         }
2142
2143         buffer = malloc(max_size);
2144         if (!buffer) {
2145                 error("not enough memory for buffer");
2146                 free(cluster);
2147                 return -ENOMEM;
2148         }
2149
2150         if (mdres->compress_method == COMPRESS_ZLIB) {
2151                 tmp = malloc(max_size);
2152                 if (!tmp) {
2153                         error("not enough memory for buffer");
2154                         free(cluster);
2155                         free(buffer);
2156                         return -ENOMEM;
2157                 }
2158         }
2159
2160         bytenr = current_cluster;
2161         while (1) {
2162                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2163                         error("seek failed: %s", strerror(errno));
2164                         ret = -EIO;
2165                         break;
2166                 }
2167
2168                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2169                 if (ret == 0) {
2170                         if (cluster_bytenr != 0) {
2171                                 cluster_bytenr = 0;
2172                                 current_cluster = 0;
2173                                 bytenr = 0;
2174                                 continue;
2175                         }
2176                         error(
2177         "unknown state after reading cluster at %llu, probably crrupted data",
2178                                         cluster_bytenr);
2179                         ret = -EIO;
2180                         break;
2181                 } else if (ret < 0) {
2182                         error("unable to read image at %llu: %s",
2183                                         (unsigned long long)cluster_bytenr,
2184                                         strerror(errno));
2185                         break;
2186                 }
2187                 ret = 0;
2188
2189                 header = &cluster->header;
2190                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2191                     le64_to_cpu(header->bytenr) != current_cluster) {
2192                         error("bad header in metadump image");
2193                         ret = -EIO;
2194                         break;
2195                 }
2196
2197                 bytenr += BLOCK_SIZE;
2198                 nritems = le32_to_cpu(header->nritems);
2199                 for (i = 0; i < nritems; i++) {
2200                         size_t size;
2201
2202                         item = &cluster->items[i];
2203                         bufsize = le32_to_cpu(item->size);
2204                         item_bytenr = le64_to_cpu(item->bytenr);
2205
2206                         if (bufsize > max_size) {
2207                                 error("item %u too big: %u > %u", i, bufsize,
2208                                                 max_size);
2209                                 ret = -EIO;
2210                                 break;
2211                         }
2212
2213                         if (mdres->compress_method == COMPRESS_ZLIB) {
2214                                 ret = fread(tmp, bufsize, 1, mdres->in);
2215                                 if (ret != 1) {
2216                                         error("read error: %s", strerror(errno));
2217                                         ret = -EIO;
2218                                         break;
2219                                 }
2220
2221                                 size = max_size;
2222                                 ret = uncompress(buffer,
2223                                                  (unsigned long *)&size, tmp,
2224                                                  bufsize);
2225                                 if (ret != Z_OK) {
2226                                         error("decompressiion failed with %d",
2227                                                         ret);
2228                                         ret = -EIO;
2229                                         break;
2230                                 }
2231                         } else {
2232                                 ret = fread(buffer, bufsize, 1, mdres->in);
2233                                 if (ret != 1) {
2234                                         error("read error: %s",
2235                                                         strerror(errno));
2236                                         ret = -EIO;
2237                                         break;
2238                                 }
2239                                 size = bufsize;
2240                         }
2241                         ret = 0;
2242
2243                         if (item_bytenr <= search &&
2244                             item_bytenr + size > search) {
2245                                 ret = read_chunk_block(mdres, buffer, search,
2246                                                        item_bytenr, size,
2247                                                        current_cluster);
2248                                 if (!ret)
2249                                         ret = 1;
2250                                 break;
2251                         }
2252                         bytenr += bufsize;
2253                 }
2254                 if (ret) {
2255                         if (ret > 0)
2256                                 ret = 0;
2257                         break;
2258                 }
2259                 if (bytenr & BLOCK_MASK)
2260                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2261                 current_cluster = bytenr;
2262         }
2263
2264         free(tmp);
2265         free(buffer);
2266         free(cluster);
2267         return ret;
2268 }
2269
2270 static int build_chunk_tree(struct mdrestore_struct *mdres,
2271                             struct meta_cluster *cluster)
2272 {
2273         struct btrfs_super_block *super;
2274         struct meta_cluster_header *header;
2275         struct meta_cluster_item *item = NULL;
2276         u64 chunk_root_bytenr = 0;
2277         u32 i, nritems;
2278         u64 bytenr = 0;
2279         u8 *buffer;
2280         int ret;
2281
2282         /* We can't seek with stdin so don't bother doing this */
2283         if (mdres->in == stdin)
2284                 return 0;
2285
2286         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2287         if (ret <= 0) {
2288                 error("unable to read cluster: %s", strerror(errno));
2289                 return -EIO;
2290         }
2291         ret = 0;
2292
2293         header = &cluster->header;
2294         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2295             le64_to_cpu(header->bytenr) != 0) {
2296                 error("bad header in metadump image");
2297                 return -EIO;
2298         }
2299
2300         bytenr += BLOCK_SIZE;
2301         mdres->compress_method = header->compress;
2302         nritems = le32_to_cpu(header->nritems);
2303         for (i = 0; i < nritems; i++) {
2304                 item = &cluster->items[i];
2305
2306                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2307                         break;
2308                 bytenr += le32_to_cpu(item->size);
2309                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2310                         error("seek failed: %s", strerror(errno));
2311                         return -EIO;
2312                 }
2313         }
2314
2315         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2316                 error("did not find superblock at %llu",
2317                                 le64_to_cpu(item->bytenr));
2318                 return -EINVAL;
2319         }
2320
2321         buffer = malloc(le32_to_cpu(item->size));
2322         if (!buffer) {
2323                 error("not enough memory to allocate buffer");
2324                 return -ENOMEM;
2325         }
2326
2327         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2328         if (ret != 1) {
2329                 error("unable to read buffer: %s", strerror(errno));
2330                 free(buffer);
2331                 return -EIO;
2332         }
2333
2334         if (mdres->compress_method == COMPRESS_ZLIB) {
2335                 size_t size = MAX_PENDING_SIZE * 2;
2336                 u8 *tmp;
2337
2338                 tmp = malloc(MAX_PENDING_SIZE * 2);
2339                 if (!tmp) {
2340                         free(buffer);
2341                         return -ENOMEM;
2342                 }
2343                 ret = uncompress(tmp, (unsigned long *)&size,
2344                                  buffer, le32_to_cpu(item->size));
2345                 if (ret != Z_OK) {
2346                         error("decompressiion failed with %d", ret);
2347                         free(buffer);
2348                         free(tmp);
2349                         return -EIO;
2350                 }
2351                 free(buffer);
2352                 buffer = tmp;
2353         }
2354
2355         pthread_mutex_lock(&mdres->mutex);
2356         super = (struct btrfs_super_block *)buffer;
2357         chunk_root_bytenr = btrfs_super_chunk_root(super);
2358         mdres->nodesize = btrfs_super_nodesize(super);
2359         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2360         memcpy(mdres->uuid, super->dev_item.uuid,
2361                        BTRFS_UUID_SIZE);
2362         mdres->devid = le64_to_cpu(super->dev_item.devid);
2363         free(buffer);
2364         pthread_mutex_unlock(&mdres->mutex);
2365
2366         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2367 }
2368
2369 static int range_contains_super(u64 physical, u64 bytes)
2370 {
2371         u64 super_bytenr;
2372         int i;
2373
2374         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2375                 super_bytenr = btrfs_sb_offset(i);
2376                 if (super_bytenr >= physical &&
2377                     super_bytenr < physical + bytes)
2378                         return 1;
2379         }
2380
2381         return 0;
2382 }
2383
2384 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2385 {
2386         struct fs_chunk *fs_chunk;
2387
2388         while (!list_empty(&mdres->overlapping_chunks)) {
2389                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2390                                             struct fs_chunk, list);
2391                 list_del_init(&fs_chunk->list);
2392                 if (range_contains_super(fs_chunk->physical,
2393                                          fs_chunk->bytes)) {
2394                         warning(
2395 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2396                         mdres->clear_space_cache = 1;
2397                 }
2398                 fs_chunk->physical = mdres->last_physical_offset;
2399                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2400                 mdres->last_physical_offset += fs_chunk->bytes;
2401         }
2402 }
2403
2404 static int fixup_devices(struct btrfs_fs_info *fs_info,
2405                          struct mdrestore_struct *mdres, off_t dev_size)
2406 {
2407         struct btrfs_trans_handle *trans;
2408         struct btrfs_dev_item *dev_item;
2409         struct btrfs_path path;
2410         struct extent_buffer *leaf;
2411         struct btrfs_root *root = fs_info->chunk_root;
2412         struct btrfs_key key;
2413         u64 devid, cur_devid;
2414         int ret;
2415
2416         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2417         if (IS_ERR(trans)) {
2418                 error("cannot starting transaction %ld", PTR_ERR(trans));
2419                 return PTR_ERR(trans);
2420         }
2421
2422         dev_item = &fs_info->super_copy->dev_item;
2423
2424         devid = btrfs_stack_device_id(dev_item);
2425
2426         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2427         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2428
2429         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2430         key.type = BTRFS_DEV_ITEM_KEY;
2431         key.offset = 0;
2432
2433         btrfs_init_path(&path);
2434
2435 again:
2436         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2437         if (ret < 0) {
2438                 error("search failed: %d", ret);
2439                 exit(1);
2440         }
2441
2442         while (1) {
2443                 leaf = path.nodes[0];
2444                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2445                         ret = btrfs_next_leaf(root, &path);
2446                         if (ret < 0) {
2447                                 error("cannot go to next leaf %d", ret);
2448                                 exit(1);
2449                         }
2450                         if (ret > 0) {
2451                                 ret = 0;
2452                                 break;
2453                         }
2454                         leaf = path.nodes[0];
2455                 }
2456
2457                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2458                 if (key.type > BTRFS_DEV_ITEM_KEY)
2459                         break;
2460                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2461                         path.slots[0]++;
2462                         continue;
2463                 }
2464
2465                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2466                                           struct btrfs_dev_item);
2467                 cur_devid = btrfs_device_id(leaf, dev_item);
2468                 if (devid != cur_devid) {
2469                         ret = btrfs_del_item(trans, root, &path);
2470                         if (ret) {
2471                                 error("cannot delete item: %d", ret);
2472                                 exit(1);
2473                         }
2474                         btrfs_release_path(&path);
2475                         goto again;
2476                 }
2477
2478                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2479                 btrfs_set_device_bytes_used(leaf, dev_item,
2480                                             mdres->alloced_chunks);
2481                 btrfs_mark_buffer_dirty(leaf);
2482                 path.slots[0]++;
2483         }
2484
2485         btrfs_release_path(&path);
2486         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2487         if (ret) {
2488                 error("unable to commit transaction: %d", ret);
2489                 return ret;
2490         }
2491         return 0;
2492 }
2493
2494 static int restore_metadump(const char *input, FILE *out, int old_restore,
2495                             int num_threads, int fixup_offset,
2496                             const char *target, int multi_devices)
2497 {
2498         struct meta_cluster *cluster = NULL;
2499         struct meta_cluster_header *header;
2500         struct mdrestore_struct mdrestore;
2501         struct btrfs_fs_info *info = NULL;
2502         u64 bytenr = 0;
2503         FILE *in = NULL;
2504         int ret = 0;
2505
2506         if (!strcmp(input, "-")) {
2507                 in = stdin;
2508         } else {
2509                 in = fopen(input, "r");
2510                 if (!in) {
2511                         error("unable to open metadump image: %s",
2512                                         strerror(errno));
2513                         return 1;
2514                 }
2515         }
2516
2517         /* NOTE: open with write mode */
2518         if (fixup_offset) {
2519                 info = open_ctree_fs_info(target, 0, 0, 0,
2520                                           OPEN_CTREE_WRITES |
2521                                           OPEN_CTREE_RESTORE |
2522                                           OPEN_CTREE_PARTIAL);
2523                 if (!info) {
2524                         error("open ctree failed");
2525                         ret = -EIO;
2526                         goto failed_open;
2527                 }
2528         }
2529
2530         cluster = malloc(BLOCK_SIZE);
2531         if (!cluster) {
2532                 error("not enough memory for cluster");
2533                 ret = -ENOMEM;
2534                 goto failed_info;
2535         }
2536
2537         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2538                              fixup_offset, info, multi_devices);
2539         if (ret) {
2540                 error("failed to initialize metadata restore state: %d", ret);
2541                 goto failed_cluster;
2542         }
2543
2544         if (!multi_devices && !old_restore) {
2545                 ret = build_chunk_tree(&mdrestore, cluster);
2546                 if (ret)
2547                         goto out;
2548                 if (!list_empty(&mdrestore.overlapping_chunks))
2549                         remap_overlapping_chunks(&mdrestore);
2550         }
2551
2552         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2553                 error("seek failed: %s", strerror(errno));
2554                 goto out;
2555         }
2556
2557         while (!mdrestore.error) {
2558                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2559                 if (!ret)
2560                         break;
2561
2562                 header = &cluster->header;
2563                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2564                     le64_to_cpu(header->bytenr) != bytenr) {
2565                         error("bad header in metadump image");
2566                         ret = -EIO;
2567                         break;
2568                 }
2569                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2570                 if (ret) {
2571                         error("failed to add cluster: %d", ret);
2572                         break;
2573                 }
2574         }
2575         ret = wait_for_worker(&mdrestore);
2576
2577         if (!ret && !multi_devices && !old_restore) {
2578                 struct btrfs_root *root;
2579                 struct stat st;
2580
2581                 root = open_ctree_fd(fileno(out), target, 0,
2582                                           OPEN_CTREE_PARTIAL |
2583                                           OPEN_CTREE_WRITES |
2584                                           OPEN_CTREE_NO_DEVICES);
2585                 if (!root) {
2586                         error("open ctree failed in %s", target);
2587                         ret = -EIO;
2588                         goto out;
2589                 }
2590                 info = root->fs_info;
2591
2592                 if (stat(target, &st)) {
2593                         error("stat %s failed: %s", target, strerror(errno));
2594                         close_ctree(info->chunk_root);
2595                         free(cluster);
2596                         return 1;
2597                 }
2598
2599                 ret = fixup_devices(info, &mdrestore, st.st_size);
2600                 close_ctree(info->chunk_root);
2601                 if (ret)
2602                         goto out;
2603         }
2604 out:
2605         mdrestore_destroy(&mdrestore, num_threads);
2606 failed_cluster:
2607         free(cluster);
2608 failed_info:
2609         if (fixup_offset && info)
2610                 close_ctree(info->chunk_root);
2611 failed_open:
2612         if (in != stdin)
2613                 fclose(in);
2614         return ret;
2615 }
2616
2617 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2618                                        const char *other_dev, u64 cur_devid)
2619 {
2620         struct btrfs_key key;
2621         struct extent_buffer *leaf;
2622         struct btrfs_path path;
2623         struct btrfs_dev_item *dev_item;
2624         struct btrfs_super_block *disk_super;
2625         char dev_uuid[BTRFS_UUID_SIZE];
2626         char fs_uuid[BTRFS_UUID_SIZE];
2627         u64 devid, type, io_align, io_width;
2628         u64 sector_size, total_bytes, bytes_used;
2629         char buf[BTRFS_SUPER_INFO_SIZE];
2630         int fp = -1;
2631         int ret;
2632
2633         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2634         key.type = BTRFS_DEV_ITEM_KEY;
2635         key.offset = cur_devid;
2636
2637         btrfs_init_path(&path);
2638         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2639         if (ret) {
2640                 error("search key failed: %d", ret);
2641                 ret = -EIO;
2642                 goto out;
2643         }
2644
2645         leaf = path.nodes[0];
2646         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2647                                   struct btrfs_dev_item);
2648
2649         devid = btrfs_device_id(leaf, dev_item);
2650         if (devid != cur_devid) {
2651                 error("devid mismatch: %llu != %llu",
2652                                 (unsigned long long)devid,
2653                                 (unsigned long long)cur_devid);
2654                 ret = -EIO;
2655                 goto out;
2656         }
2657
2658         type = btrfs_device_type(leaf, dev_item);
2659         io_align = btrfs_device_io_align(leaf, dev_item);
2660         io_width = btrfs_device_io_width(leaf, dev_item);
2661         sector_size = btrfs_device_sector_size(leaf, dev_item);
2662         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2663         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2664         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2665         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2666
2667         btrfs_release_path(&path);
2668
2669         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2670
2671         /* update other devices' super block */
2672         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2673         if (fp < 0) {
2674                 error("could not open %s: %s", other_dev, strerror(errno));
2675                 ret = -EIO;
2676                 goto out;
2677         }
2678
2679         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2680
2681         disk_super = (struct btrfs_super_block *)buf;
2682         dev_item = &disk_super->dev_item;
2683
2684         btrfs_set_stack_device_type(dev_item, type);
2685         btrfs_set_stack_device_id(dev_item, devid);
2686         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2687         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2688         btrfs_set_stack_device_io_align(dev_item, io_align);
2689         btrfs_set_stack_device_io_width(dev_item, io_width);
2690         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2691         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2692         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2693         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2694
2695         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2696         if (ret != BTRFS_SUPER_INFO_SIZE) {
2697                 if (ret < 0)
2698                         error("cannot write superblock: %s", strerror(ret));
2699                 else
2700                         error("cannot write superblock");
2701                 ret = -EIO;
2702                 goto out;
2703         }
2704
2705         write_backup_supers(fp, (u8 *)buf);
2706
2707 out:
2708         if (fp != -1)
2709                 close(fp);
2710         return ret;
2711 }
2712
2713 static void print_usage(int ret)
2714 {
2715         printf("usage: btrfs-image [options] source target\n");
2716         printf("\t-r      \trestore metadump image\n");
2717         printf("\t-c value\tcompression level (0 ~ 9)\n");
2718         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2719         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2720         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2721         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2722         printf("\t-m       \trestore for multiple devices\n");
2723         printf("\n");
2724         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2725         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2726         exit(ret);
2727 }
2728
2729 int main(int argc, char *argv[])
2730 {
2731         char *source;
2732         char *target;
2733         u64 num_threads = 0;
2734         u64 compress_level = 0;
2735         int create = 1;
2736         int old_restore = 0;
2737         int walk_trees = 0;
2738         int multi_devices = 0;
2739         int ret;
2740         int sanitize = 0;
2741         int dev_cnt = 0;
2742         int usage_error = 0;
2743         FILE *out;
2744
2745         while (1) {
2746                 static const struct option long_options[] = {
2747                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2748                         { NULL, 0, NULL, 0 }
2749                 };
2750                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2751                 if (c < 0)
2752                         break;
2753                 switch (c) {
2754                 case 'r':
2755                         create = 0;
2756                         break;
2757                 case 't':
2758                         num_threads = arg_strtou64(optarg);
2759                         if (num_threads > MAX_WORKER_THREADS) {
2760                                 error("number of threads out of range: %llu > %d",
2761                                         (unsigned long long)num_threads,
2762                                         MAX_WORKER_THREADS);
2763                                 return 1;
2764                         }
2765                         break;
2766                 case 'c':
2767                         compress_level = arg_strtou64(optarg);
2768                         if (compress_level > 9) {
2769                                 error("compression level out of range: %llu",
2770                                         (unsigned long long)compress_level);
2771                                 return 1;
2772                         }
2773                         break;
2774                 case 'o':
2775                         old_restore = 1;
2776                         break;
2777                 case 's':
2778                         sanitize++;
2779                         break;
2780                 case 'w':
2781                         walk_trees = 1;
2782                         break;
2783                 case 'm':
2784                         create = 0;
2785                         multi_devices = 1;
2786                         break;
2787                         case GETOPT_VAL_HELP:
2788                 default:
2789                         print_usage(c != GETOPT_VAL_HELP);
2790                 }
2791         }
2792
2793         set_argv0(argv);
2794         if (check_argc_min(argc - optind, 2))
2795                 print_usage(1);
2796
2797         dev_cnt = argc - optind - 1;
2798
2799         if (create) {
2800                 if (old_restore) {
2801                         error(
2802                         "create and restore cannot be used at the same time");
2803                         usage_error++;
2804                 }
2805         } else {
2806                 if (walk_trees || sanitize || compress_level) {
2807                         error(
2808                         "useing -w, -s, -c options for restore makes no sense");
2809                         usage_error++;
2810                 }
2811                 if (multi_devices && dev_cnt < 2) {
2812                         error("not enough devices specified for -m option");
2813                         usage_error++;
2814                 }
2815                 if (!multi_devices && dev_cnt != 1) {
2816                         error("accepts only 1 device without -m option");
2817                         usage_error++;
2818                 }
2819         }
2820
2821         if (usage_error)
2822                 print_usage(1);
2823
2824         source = argv[optind];
2825         target = argv[optind + 1];
2826
2827         if (create && !strcmp(target, "-")) {
2828                 out = stdout;
2829         } else {
2830                 out = fopen(target, "w+");
2831                 if (!out) {
2832                         error("unable to create target file %s", target);
2833                         exit(1);
2834                 }
2835         }
2836
2837         if (compress_level > 0 || create == 0) {
2838                 if (num_threads == 0) {
2839                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2840
2841                         if (tmp <= 0)
2842                                 tmp = 1;
2843                         num_threads = tmp;
2844                 }
2845         } else {
2846                 num_threads = 0;
2847         }
2848
2849         if (create) {
2850                 ret = check_mounted(source);
2851                 if (ret < 0) {
2852                         warning("unable to check mount status of: %s",
2853                                         strerror(-ret));
2854                 } else if (ret) {
2855                         warning("%s already mounted, results may be inaccurate",
2856                                         source);
2857                 }
2858
2859                 ret = create_metadump(source, out, num_threads,
2860                                       compress_level, sanitize, walk_trees);
2861         } else {
2862                 ret = restore_metadump(source, out, old_restore, num_threads,
2863                                        0, target, multi_devices);
2864         }
2865         if (ret) {
2866                 error("%s failed: %s", (create) ? "create" : "restore",
2867                        strerror(errno));
2868                 goto out;
2869         }
2870
2871          /* extended support for multiple devices */
2872         if (!create && multi_devices) {
2873                 struct btrfs_fs_info *info;
2874                 u64 total_devs;
2875                 int i;
2876
2877                 info = open_ctree_fs_info(target, 0, 0, 0,
2878                                           OPEN_CTREE_PARTIAL |
2879                                           OPEN_CTREE_RESTORE);
2880                 if (!info) {
2881                         error("open ctree failed at %s", target);
2882                         return 1;
2883                 }
2884
2885                 total_devs = btrfs_super_num_devices(info->super_copy);
2886                 if (total_devs != dev_cnt) {
2887                         error("it needs %llu devices but has only %d",
2888                                 total_devs, dev_cnt);
2889                         close_ctree(info->chunk_root);
2890                         goto out;
2891                 }
2892
2893                 /* update super block on other disks */
2894                 for (i = 2; i <= dev_cnt; i++) {
2895                         ret = update_disk_super_on_device(info,
2896                                         argv[optind + i], (u64)i);
2897                         if (ret) {
2898                                 error("update disk superblock failed devid %d: %d",
2899                                         i, ret);
2900                                 close_ctree(info->chunk_root);
2901                                 exit(1);
2902                         }
2903                 }
2904
2905                 close_ctree(info->chunk_root);
2906
2907                 /* fix metadata block to map correct chunk */
2908                 ret = restore_metadump(source, out, 0, num_threads, 1,
2909                                        target, 1);
2910                 if (ret) {
2911                         error("unable to fixup metadump: %d", ret);
2912                         exit(1);
2913                 }
2914         }
2915 out:
2916         if (out == stdout) {
2917                 fflush(out);
2918         } else {
2919                 fclose(out);
2920                 if (ret && create) {
2921                         int unlink_ret;
2922
2923                         unlink_ret = unlink(target);
2924                         if (unlink_ret)
2925                                 error("unlink output file %s failed: %s",
2926                                                 target, strerror(errno));
2927                 }
2928         }
2929
2930         btrfs_close_all_devices();
2931
2932         return !!ret;
2933 }