c743cb4d59773d73d83ae24b2ec50d93c73f1722
[platform/upstream/btrfs-progs.git] / image / main.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38 #include "help.h"
39
40 #define HEADER_MAGIC            0xbd5c25e27295668bULL
41 #define MAX_PENDING_SIZE        (256 * 1024)
42 #define BLOCK_SIZE              1024
43 #define BLOCK_MASK              (BLOCK_SIZE - 1)
44
45 #define COMPRESS_NONE           0
46 #define COMPRESS_ZLIB           1
47
48 #define MAX_WORKER_THREADS      (32)
49
50 struct meta_cluster_item {
51         __le64 bytenr;
52         __le32 size;
53 } __attribute__ ((__packed__));
54
55 struct meta_cluster_header {
56         __le64 magic;
57         __le64 bytenr;
58         __le32 nritems;
59         u8 compress;
60 } __attribute__ ((__packed__));
61
62 /* cluster header + index items + buffers */
63 struct meta_cluster {
64         struct meta_cluster_header header;
65         struct meta_cluster_item items[];
66 } __attribute__ ((__packed__));
67
68 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
69                            sizeof(struct meta_cluster_item))
70
71 struct fs_chunk {
72         u64 logical;
73         u64 physical;
74         /*
75          * physical_dup only store additonal physical for BTRFS_BLOCK_GROUP_DUP
76          * currently restore only support single and DUP
77          * TODO: modify this structure and the function related to this
78          * structure for support RAID*
79          */
80         u64 physical_dup;
81         u64 bytes;
82         struct rb_node l;
83         struct rb_node p;
84         struct list_head list;
85 };
86
87 struct async_work {
88         struct list_head list;
89         struct list_head ordered;
90         u64 start;
91         u64 size;
92         u8 *buffer;
93         size_t bufsize;
94         int error;
95 };
96
97 struct metadump_struct {
98         struct btrfs_root *root;
99         FILE *out;
100
101         union {
102                 struct meta_cluster cluster;
103                 char meta_cluster_bytes[BLOCK_SIZE];
104         };
105
106         pthread_t threads[MAX_WORKER_THREADS];
107         size_t num_threads;
108         pthread_mutex_t mutex;
109         pthread_cond_t cond;
110         struct rb_root name_tree;
111
112         struct list_head list;
113         struct list_head ordered;
114         size_t num_items;
115         size_t num_ready;
116
117         u64 pending_start;
118         u64 pending_size;
119
120         int compress_level;
121         int done;
122         int data;
123         int sanitize_names;
124
125         int error;
126 };
127
128 struct name {
129         struct rb_node n;
130         char *val;
131         char *sub;
132         u32 len;
133 };
134
135 struct mdrestore_struct {
136         FILE *in;
137         FILE *out;
138
139         pthread_t threads[MAX_WORKER_THREADS];
140         size_t num_threads;
141         pthread_mutex_t mutex;
142         pthread_cond_t cond;
143
144         struct rb_root chunk_tree;
145         struct rb_root physical_tree;
146         struct list_head list;
147         struct list_head overlapping_chunks;
148         size_t num_items;
149         u32 nodesize;
150         u64 devid;
151         u64 alloced_chunks;
152         u64 last_physical_offset;
153         u8 uuid[BTRFS_UUID_SIZE];
154         u8 fsid[BTRFS_FSID_SIZE];
155
156         int compress_method;
157         int done;
158         int error;
159         int old_restore;
160         int fixup_offset;
161         int multi_devices;
162         int clear_space_cache;
163         struct btrfs_fs_info *info;
164 };
165
166 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
167                                    u64 search, u64 cluster_bytenr);
168 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
169
170 static void csum_block(u8 *buf, size_t len)
171 {
172         u8 result[BTRFS_CRC32_SIZE];
173         u32 crc = ~(u32)0;
174         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
175         btrfs_csum_final(crc, result);
176         memcpy(buf, result, BTRFS_CRC32_SIZE);
177 }
178
179 static int has_name(struct btrfs_key *key)
180 {
181         switch (key->type) {
182         case BTRFS_DIR_ITEM_KEY:
183         case BTRFS_DIR_INDEX_KEY:
184         case BTRFS_INODE_REF_KEY:
185         case BTRFS_INODE_EXTREF_KEY:
186         case BTRFS_XATTR_ITEM_KEY:
187                 return 1;
188         default:
189                 break;
190         }
191
192         return 0;
193 }
194
195 static char *generate_garbage(u32 name_len)
196 {
197         char *buf = malloc(name_len);
198         int i;
199
200         if (!buf)
201                 return NULL;
202
203         for (i = 0; i < name_len; i++) {
204                 char c = rand_range(94) + 33;
205
206                 if (c == '/')
207                         c++;
208                 buf[i] = c;
209         }
210
211         return buf;
212 }
213
214 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
215 {
216         struct name *entry = rb_entry(a, struct name, n);
217         struct name *ins = rb_entry(b, struct name, n);
218         u32 len;
219
220         len = min(ins->len, entry->len);
221         return memcmp(ins->val, entry->val, len);
222 }
223
224 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
225 {
226         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
227         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
228
229         if (fuzz && ins->logical >= entry->logical &&
230             ins->logical < entry->logical + entry->bytes)
231                 return 0;
232
233         if (ins->logical < entry->logical)
234                 return -1;
235         else if (ins->logical > entry->logical)
236                 return 1;
237         return 0;
238 }
239
240 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
241 {
242         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
243         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
244
245         if (fuzz && ins->physical >= entry->physical &&
246             ins->physical < entry->physical + entry->bytes)
247                 return 0;
248
249         if (fuzz && entry->physical >= ins->physical &&
250             entry->physical < ins->physical + ins->bytes)
251                 return 0;
252
253         if (ins->physical < entry->physical)
254                 return -1;
255         else if (ins->physical > entry->physical)
256                 return 1;
257         return 0;
258 }
259
260 static void tree_insert(struct rb_root *root, struct rb_node *ins,
261                         int (*cmp)(struct rb_node *a, struct rb_node *b,
262                                    int fuzz))
263 {
264         struct rb_node ** p = &root->rb_node;
265         struct rb_node * parent = NULL;
266         int dir;
267
268         while(*p) {
269                 parent = *p;
270
271                 dir = cmp(*p, ins, 1);
272                 if (dir < 0)
273                         p = &(*p)->rb_left;
274                 else if (dir > 0)
275                         p = &(*p)->rb_right;
276                 else
277                         BUG();
278         }
279
280         rb_link_node(ins, parent, p);
281         rb_insert_color(ins, root);
282 }
283
284 static struct rb_node *tree_search(struct rb_root *root,
285                                    struct rb_node *search,
286                                    int (*cmp)(struct rb_node *a,
287                                               struct rb_node *b, int fuzz),
288                                    int fuzz)
289 {
290         struct rb_node *n = root->rb_node;
291         int dir;
292
293         while (n) {
294                 dir = cmp(n, search, fuzz);
295                 if (dir < 0)
296                         n = n->rb_left;
297                 else if (dir > 0)
298                         n = n->rb_right;
299                 else
300                         return n;
301         }
302
303         return NULL;
304 }
305
306 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
307                                u64 *size, u64 *physical_dup)
308 {
309         struct fs_chunk *fs_chunk;
310         struct rb_node *entry;
311         struct fs_chunk search;
312         u64 offset;
313
314         if (logical == BTRFS_SUPER_INFO_OFFSET)
315                 return logical;
316
317         search.logical = logical;
318         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
319         if (!entry) {
320                 if (mdres->in != stdin)
321                         warning("cannot find a chunk, using logical");
322                 return logical;
323         }
324         fs_chunk = rb_entry(entry, struct fs_chunk, l);
325         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
326                 BUG();
327         offset = search.logical - fs_chunk->logical;
328
329         if (physical_dup) {
330                 /* Only in dup case, physical_dup is not equal to 0 */
331                 if (fs_chunk->physical_dup)
332                         *physical_dup = fs_chunk->physical_dup + offset;
333                 else
334                         *physical_dup = 0;
335         }
336
337         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
338         return fs_chunk->physical + offset;
339 }
340
341 /*
342  * Reverse CRC-32C table
343  */
344 static const u32 crc32c_rev_table[256] = {
345         0x00000000L,0x05EC76F1L,0x0BD8EDE2L,0x0E349B13L,
346         0x17B1DBC4L,0x125DAD35L,0x1C693626L,0x198540D7L,
347         0x2F63B788L,0x2A8FC179L,0x24BB5A6AL,0x21572C9BL,
348         0x38D26C4CL,0x3D3E1ABDL,0x330A81AEL,0x36E6F75FL,
349         0x5EC76F10L,0x5B2B19E1L,0x551F82F2L,0x50F3F403L,
350         0x4976B4D4L,0x4C9AC225L,0x42AE5936L,0x47422FC7L,
351         0x71A4D898L,0x7448AE69L,0x7A7C357AL,0x7F90438BL,
352         0x6615035CL,0x63F975ADL,0x6DCDEEBEL,0x6821984FL,
353         0xBD8EDE20L,0xB862A8D1L,0xB65633C2L,0xB3BA4533L,
354         0xAA3F05E4L,0xAFD37315L,0xA1E7E806L,0xA40B9EF7L,
355         0x92ED69A8L,0x97011F59L,0x9935844AL,0x9CD9F2BBL,
356         0x855CB26CL,0x80B0C49DL,0x8E845F8EL,0x8B68297FL,
357         0xE349B130L,0xE6A5C7C1L,0xE8915CD2L,0xED7D2A23L,
358         0xF4F86AF4L,0xF1141C05L,0xFF208716L,0xFACCF1E7L,
359         0xCC2A06B8L,0xC9C67049L,0xC7F2EB5AL,0xC21E9DABL,
360         0xDB9BDD7CL,0xDE77AB8DL,0xD043309EL,0xD5AF466FL,
361         0x7EF1CAB1L,0x7B1DBC40L,0x75292753L,0x70C551A2L,
362         0x69401175L,0x6CAC6784L,0x6298FC97L,0x67748A66L,
363         0x51927D39L,0x547E0BC8L,0x5A4A90DBL,0x5FA6E62AL,
364         0x4623A6FDL,0x43CFD00CL,0x4DFB4B1FL,0x48173DEEL,
365         0x2036A5A1L,0x25DAD350L,0x2BEE4843L,0x2E023EB2L,
366         0x37877E65L,0x326B0894L,0x3C5F9387L,0x39B3E576L,
367         0x0F551229L,0x0AB964D8L,0x048DFFCBL,0x0161893AL,
368         0x18E4C9EDL,0x1D08BF1CL,0x133C240FL,0x16D052FEL,
369         0xC37F1491L,0xC6936260L,0xC8A7F973L,0xCD4B8F82L,
370         0xD4CECF55L,0xD122B9A4L,0xDF1622B7L,0xDAFA5446L,
371         0xEC1CA319L,0xE9F0D5E8L,0xE7C44EFBL,0xE228380AL,
372         0xFBAD78DDL,0xFE410E2CL,0xF075953FL,0xF599E3CEL,
373         0x9DB87B81L,0x98540D70L,0x96609663L,0x938CE092L,
374         0x8A09A045L,0x8FE5D6B4L,0x81D14DA7L,0x843D3B56L,
375         0xB2DBCC09L,0xB737BAF8L,0xB90321EBL,0xBCEF571AL,
376         0xA56A17CDL,0xA086613CL,0xAEB2FA2FL,0xAB5E8CDEL,
377         0xFDE39562L,0xF80FE393L,0xF63B7880L,0xF3D70E71L,
378         0xEA524EA6L,0xEFBE3857L,0xE18AA344L,0xE466D5B5L,
379         0xD28022EAL,0xD76C541BL,0xD958CF08L,0xDCB4B9F9L,
380         0xC531F92EL,0xC0DD8FDFL,0xCEE914CCL,0xCB05623DL,
381         0xA324FA72L,0xA6C88C83L,0xA8FC1790L,0xAD106161L,
382         0xB49521B6L,0xB1795747L,0xBF4DCC54L,0xBAA1BAA5L,
383         0x8C474DFAL,0x89AB3B0BL,0x879FA018L,0x8273D6E9L,
384         0x9BF6963EL,0x9E1AE0CFL,0x902E7BDCL,0x95C20D2DL,
385         0x406D4B42L,0x45813DB3L,0x4BB5A6A0L,0x4E59D051L,
386         0x57DC9086L,0x5230E677L,0x5C047D64L,0x59E80B95L,
387         0x6F0EFCCAL,0x6AE28A3BL,0x64D61128L,0x613A67D9L,
388         0x78BF270EL,0x7D5351FFL,0x7367CAECL,0x768BBC1DL,
389         0x1EAA2452L,0x1B4652A3L,0x1572C9B0L,0x109EBF41L,
390         0x091BFF96L,0x0CF78967L,0x02C31274L,0x072F6485L,
391         0x31C993DAL,0x3425E52BL,0x3A117E38L,0x3FFD08C9L,
392         0x2678481EL,0x23943EEFL,0x2DA0A5FCL,0x284CD30DL,
393         0x83125FD3L,0x86FE2922L,0x88CAB231L,0x8D26C4C0L,
394         0x94A38417L,0x914FF2E6L,0x9F7B69F5L,0x9A971F04L,
395         0xAC71E85BL,0xA99D9EAAL,0xA7A905B9L,0xA2457348L,
396         0xBBC0339FL,0xBE2C456EL,0xB018DE7DL,0xB5F4A88CL,
397         0xDDD530C3L,0xD8394632L,0xD60DDD21L,0xD3E1ABD0L,
398         0xCA64EB07L,0xCF889DF6L,0xC1BC06E5L,0xC4507014L,
399         0xF2B6874BL,0xF75AF1BAL,0xF96E6AA9L,0xFC821C58L,
400         0xE5075C8FL,0xE0EB2A7EL,0xEEDFB16DL,0xEB33C79CL,
401         0x3E9C81F3L,0x3B70F702L,0x35446C11L,0x30A81AE0L,
402         0x292D5A37L,0x2CC12CC6L,0x22F5B7D5L,0x2719C124L,
403         0x11FF367BL,0x1413408AL,0x1A27DB99L,0x1FCBAD68L,
404         0x064EEDBFL,0x03A29B4EL,0x0D96005DL,0x087A76ACL,
405         0x605BEEE3L,0x65B79812L,0x6B830301L,0x6E6F75F0L,
406         0x77EA3527L,0x720643D6L,0x7C32D8C5L,0x79DEAE34L,
407         0x4F38596BL,0x4AD42F9AL,0x44E0B489L,0x410CC278L,
408         0x588982AFL,0x5D65F45EL,0x53516F4DL,0x56BD19BCL
409 };
410
411 /*
412  * Calculate a 4-byte suffix to match desired CRC32C
413  *
414  * @current_crc: CRC32C checksum of all bytes before the suffix
415  * @desired_crc: the checksum that we want to get after adding the suffix
416  *
417  * Outputs: @suffix: pointer to where the suffix will be written (4-bytes)
418  */
419 static void find_collision_calc_suffix(unsigned long current_crc,
420                                        unsigned long desired_crc,
421                                        char *suffix)
422 {
423         int i;
424
425         for(i = 3; i >= 0; i--) {
426                 desired_crc = (desired_crc << 8)
427                             ^ crc32c_rev_table[desired_crc >> 24 & 0xFF]
428                             ^ ((current_crc >> i * 8) & 0xFF);
429         }
430         for (i = 0; i < 4; i++)
431                 suffix[i] = (desired_crc >> i * 8) & 0xFF;
432 }
433
434 /*
435  * Check if suffix is valid according to our file name conventions
436  */
437 static int find_collision_is_suffix_valid(const char *suffix)
438 {
439         int i;
440         char c;
441
442         for (i = 0; i < 4; i++) {
443                 c = suffix[i];
444                 if (c < ' ' || c > 126 || c == '/')
445                         return 0;
446         }
447         return 1;
448 }
449
450 static int find_collision_brute_force(struct name *val, u32 name_len)
451 {
452         unsigned long checksum;
453         int found = 0;
454         int i;
455
456         checksum = crc32c(~1, val->val, name_len);
457         memset(val->sub, ' ', name_len);
458         i = 0;
459         while (1) {
460                 if (crc32c(~1, val->sub, name_len) == checksum &&
461                     memcmp(val->sub, val->val, val->len)) {
462                         found = 1;
463                         break;
464                 }
465
466                 if (val->sub[i] == 126) {
467                         do {
468                                 i++;
469                                 if (i >= name_len)
470                                         break;
471                         } while (val->sub[i] == 126);
472
473                         if (i >= name_len)
474                                 break;
475                         val->sub[i]++;
476                         if (val->sub[i] == '/')
477                                 val->sub[i]++;
478                         memset(val->sub, ' ', i);
479                         i = 0;
480                         continue;
481                 } else {
482                         val->sub[i]++;
483                         if (val->sub[i] == '/')
484                                 val->sub[i]++;
485                 }
486         }
487         return found;
488 }
489
490 static char *find_collision(struct metadump_struct *md, char *name,
491                             u32 name_len)
492 {
493         struct name *val;
494         struct rb_node *entry;
495         struct name tmp;
496         int found;
497         int i;
498
499         tmp.val = name;
500         tmp.len = name_len;
501         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
502         if (entry) {
503                 val = rb_entry(entry, struct name, n);
504                 free(name);
505                 return val->sub;
506         }
507
508         val = malloc(sizeof(struct name));
509         if (!val) {
510                 error("cannot sanitize name, not enough memory");
511                 free(name);
512                 return NULL;
513         }
514
515         memset(val, 0, sizeof(*val));
516
517         val->val = name;
518         val->len = name_len;
519         val->sub = malloc(name_len);
520         if (!val->sub) {
521                 error("cannot sanitize name, not enough memory");
522                 free(val);
523                 free(name);
524                 return NULL;
525         }
526
527         found = find_collision_brute_force(val, name_len);
528
529         if (!found) {
530                 warning(
531 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
532                         val->len, val->val);
533                 for (i = 0; i < name_len; i++) {
534                         char c = rand_range(94) + 33;
535
536                         if (c == '/')
537                                 c++;
538                         val->sub[i] = c;
539                 }
540         }
541
542         tree_insert(&md->name_tree, &val->n, name_cmp);
543         return val->sub;
544 }
545
546 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
547                               int slot)
548 {
549         struct btrfs_dir_item *dir_item;
550         char *buf;
551         char *garbage;
552         unsigned long name_ptr;
553         u32 total_len;
554         u32 cur = 0;
555         u32 this_len;
556         u32 name_len;
557         int free_garbage = (md->sanitize_names == 1);
558
559         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
560         total_len = btrfs_item_size_nr(eb, slot);
561         while (cur < total_len) {
562                 this_len = sizeof(*dir_item) +
563                         btrfs_dir_name_len(eb, dir_item) +
564                         btrfs_dir_data_len(eb, dir_item);
565                 name_ptr = (unsigned long)(dir_item + 1);
566                 name_len = btrfs_dir_name_len(eb, dir_item);
567
568                 if (md->sanitize_names > 1) {
569                         buf = malloc(name_len);
570                         if (!buf) {
571                                 error("cannot sanitize name, not enough memory");
572                                 return;
573                         }
574                         read_extent_buffer(eb, buf, name_ptr, name_len);
575                         garbage = find_collision(md, buf, name_len);
576                 } else {
577                         garbage = generate_garbage(name_len);
578                 }
579                 if (!garbage) {
580                         error("cannot sanitize name, not enough memory");
581                         return;
582                 }
583                 write_extent_buffer(eb, garbage, name_ptr, name_len);
584                 cur += this_len;
585                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
586                                                      this_len);
587                 if (free_garbage)
588                         free(garbage);
589         }
590 }
591
592 static void sanitize_inode_ref(struct metadump_struct *md,
593                                struct extent_buffer *eb, int slot, int ext)
594 {
595         struct btrfs_inode_extref *extref;
596         struct btrfs_inode_ref *ref;
597         char *garbage, *buf;
598         unsigned long ptr;
599         unsigned long name_ptr;
600         u32 item_size;
601         u32 cur_offset = 0;
602         int len;
603         int free_garbage = (md->sanitize_names == 1);
604
605         item_size = btrfs_item_size_nr(eb, slot);
606         ptr = btrfs_item_ptr_offset(eb, slot);
607         while (cur_offset < item_size) {
608                 if (ext) {
609                         extref = (struct btrfs_inode_extref *)(ptr +
610                                                                cur_offset);
611                         name_ptr = (unsigned long)(&extref->name);
612                         len = btrfs_inode_extref_name_len(eb, extref);
613                         cur_offset += sizeof(*extref);
614                 } else {
615                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
616                         len = btrfs_inode_ref_name_len(eb, ref);
617                         name_ptr = (unsigned long)(ref + 1);
618                         cur_offset += sizeof(*ref);
619                 }
620                 cur_offset += len;
621
622                 if (md->sanitize_names > 1) {
623                         buf = malloc(len);
624                         if (!buf) {
625                                 error("cannot sanitize name, not enough memory");
626                                 return;
627                         }
628                         read_extent_buffer(eb, buf, name_ptr, len);
629                         garbage = find_collision(md, buf, len);
630                 } else {
631                         garbage = generate_garbage(len);
632                 }
633
634                 if (!garbage) {
635                         error("cannot sanitize name, not enough memory");
636                         return;
637                 }
638                 write_extent_buffer(eb, garbage, name_ptr, len);
639                 if (free_garbage)
640                         free(garbage);
641         }
642 }
643
644 static void sanitize_xattr(struct metadump_struct *md,
645                            struct extent_buffer *eb, int slot)
646 {
647         struct btrfs_dir_item *dir_item;
648         unsigned long data_ptr;
649         u32 data_len;
650
651         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
652         data_len = btrfs_dir_data_len(eb, dir_item);
653
654         data_ptr = (unsigned long)((char *)(dir_item + 1) +
655                                    btrfs_dir_name_len(eb, dir_item));
656         memset_extent_buffer(eb, 0, data_ptr, data_len);
657 }
658
659 static void sanitize_name(struct metadump_struct *md, u8 *dst,
660                           struct extent_buffer *src, struct btrfs_key *key,
661                           int slot)
662 {
663         struct extent_buffer *eb;
664
665         eb = alloc_dummy_eb(src->start, src->len);
666         if (!eb) {
667                 error("cannot sanitize name, not enough memory");
668                 return;
669         }
670
671         memcpy(eb->data, src->data, src->len);
672
673         switch (key->type) {
674         case BTRFS_DIR_ITEM_KEY:
675         case BTRFS_DIR_INDEX_KEY:
676                 sanitize_dir_item(md, eb, slot);
677                 break;
678         case BTRFS_INODE_REF_KEY:
679                 sanitize_inode_ref(md, eb, slot, 0);
680                 break;
681         case BTRFS_INODE_EXTREF_KEY:
682                 sanitize_inode_ref(md, eb, slot, 1);
683                 break;
684         case BTRFS_XATTR_ITEM_KEY:
685                 sanitize_xattr(md, eb, slot);
686                 break;
687         default:
688                 break;
689         }
690
691         memcpy(dst, eb->data, eb->len);
692         free(eb);
693 }
694
695 /*
696  * zero inline extents and csum items
697  */
698 static void zero_items(struct metadump_struct *md, u8 *dst,
699                        struct extent_buffer *src)
700 {
701         struct btrfs_file_extent_item *fi;
702         struct btrfs_item *item;
703         struct btrfs_key key;
704         u32 nritems = btrfs_header_nritems(src);
705         size_t size;
706         unsigned long ptr;
707         int i, extent_type;
708
709         for (i = 0; i < nritems; i++) {
710                 item = btrfs_item_nr(i);
711                 btrfs_item_key_to_cpu(src, &key, i);
712                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
713                         size = btrfs_item_size_nr(src, i);
714                         memset(dst + btrfs_leaf_data(src) +
715                                btrfs_item_offset_nr(src, i), 0, size);
716                         continue;
717                 }
718
719                 if (md->sanitize_names && has_name(&key)) {
720                         sanitize_name(md, dst, src, &key, i);
721                         continue;
722                 }
723
724                 if (key.type != BTRFS_EXTENT_DATA_KEY)
725                         continue;
726
727                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
728                 extent_type = btrfs_file_extent_type(src, fi);
729                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
730                         continue;
731
732                 ptr = btrfs_file_extent_inline_start(fi);
733                 size = btrfs_file_extent_inline_item_len(src, item);
734                 memset(dst + ptr, 0, size);
735         }
736 }
737
738 /*
739  * copy buffer and zero useless data in the buffer
740  */
741 static void copy_buffer(struct metadump_struct *md, u8 *dst,
742                         struct extent_buffer *src)
743 {
744         int level;
745         size_t size;
746         u32 nritems;
747
748         memcpy(dst, src->data, src->len);
749         if (src->start == BTRFS_SUPER_INFO_OFFSET)
750                 return;
751
752         level = btrfs_header_level(src);
753         nritems = btrfs_header_nritems(src);
754
755         if (nritems == 0) {
756                 size = sizeof(struct btrfs_header);
757                 memset(dst + size, 0, src->len - size);
758         } else if (level == 0) {
759                 size = btrfs_leaf_data(src) +
760                         btrfs_item_offset_nr(src, nritems - 1) -
761                         btrfs_item_nr_offset(nritems);
762                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
763                 zero_items(md, dst, src);
764         } else {
765                 size = offsetof(struct btrfs_node, ptrs) +
766                         sizeof(struct btrfs_key_ptr) * nritems;
767                 memset(dst + size, 0, src->len - size);
768         }
769         csum_block(dst, src->len);
770 }
771
772 static void *dump_worker(void *data)
773 {
774         struct metadump_struct *md = (struct metadump_struct *)data;
775         struct async_work *async;
776         int ret;
777
778         while (1) {
779                 pthread_mutex_lock(&md->mutex);
780                 while (list_empty(&md->list)) {
781                         if (md->done) {
782                                 pthread_mutex_unlock(&md->mutex);
783                                 goto out;
784                         }
785                         pthread_cond_wait(&md->cond, &md->mutex);
786                 }
787                 async = list_entry(md->list.next, struct async_work, list);
788                 list_del_init(&async->list);
789                 pthread_mutex_unlock(&md->mutex);
790
791                 if (md->compress_level > 0) {
792                         u8 *orig = async->buffer;
793
794                         async->bufsize = compressBound(async->size);
795                         async->buffer = malloc(async->bufsize);
796                         if (!async->buffer) {
797                                 error("not enough memory for async buffer");
798                                 pthread_mutex_lock(&md->mutex);
799                                 if (!md->error)
800                                         md->error = -ENOMEM;
801                                 pthread_mutex_unlock(&md->mutex);
802                                 pthread_exit(NULL);
803                         }
804
805                         ret = compress2(async->buffer,
806                                          (unsigned long *)&async->bufsize,
807                                          orig, async->size, md->compress_level);
808
809                         if (ret != Z_OK)
810                                 async->error = 1;
811
812                         free(orig);
813                 }
814
815                 pthread_mutex_lock(&md->mutex);
816                 md->num_ready++;
817                 pthread_mutex_unlock(&md->mutex);
818         }
819 out:
820         pthread_exit(NULL);
821 }
822
823 static void meta_cluster_init(struct metadump_struct *md, u64 start)
824 {
825         struct meta_cluster_header *header;
826
827         md->num_items = 0;
828         md->num_ready = 0;
829         header = &md->cluster.header;
830         header->magic = cpu_to_le64(HEADER_MAGIC);
831         header->bytenr = cpu_to_le64(start);
832         header->nritems = cpu_to_le32(0);
833         header->compress = md->compress_level > 0 ?
834                            COMPRESS_ZLIB : COMPRESS_NONE;
835 }
836
837 static void metadump_destroy(struct metadump_struct *md, int num_threads)
838 {
839         int i;
840         struct rb_node *n;
841
842         pthread_mutex_lock(&md->mutex);
843         md->done = 1;
844         pthread_cond_broadcast(&md->cond);
845         pthread_mutex_unlock(&md->mutex);
846
847         for (i = 0; i < num_threads; i++)
848                 pthread_join(md->threads[i], NULL);
849
850         pthread_cond_destroy(&md->cond);
851         pthread_mutex_destroy(&md->mutex);
852
853         while ((n = rb_first(&md->name_tree))) {
854                 struct name *name;
855
856                 name = rb_entry(n, struct name, n);
857                 rb_erase(n, &md->name_tree);
858                 free(name->val);
859                 free(name->sub);
860                 free(name);
861         }
862 }
863
864 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
865                          FILE *out, int num_threads, int compress_level,
866                          int sanitize_names)
867 {
868         int i, ret = 0;
869
870         memset(md, 0, sizeof(*md));
871         INIT_LIST_HEAD(&md->list);
872         INIT_LIST_HEAD(&md->ordered);
873         md->root = root;
874         md->out = out;
875         md->pending_start = (u64)-1;
876         md->compress_level = compress_level;
877         md->sanitize_names = sanitize_names;
878         if (sanitize_names > 1)
879                 crc32c_optimization_init();
880
881         md->name_tree.rb_node = NULL;
882         md->num_threads = num_threads;
883         pthread_cond_init(&md->cond, NULL);
884         pthread_mutex_init(&md->mutex, NULL);
885         meta_cluster_init(md, 0);
886
887         if (!num_threads)
888                 return 0;
889
890         for (i = 0; i < num_threads; i++) {
891                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
892                 if (ret)
893                         break;
894         }
895
896         if (ret)
897                 metadump_destroy(md, i + 1);
898
899         return ret;
900 }
901
902 static int write_zero(FILE *out, size_t size)
903 {
904         static char zero[BLOCK_SIZE];
905         return fwrite(zero, size, 1, out);
906 }
907
908 static int write_buffers(struct metadump_struct *md, u64 *next)
909 {
910         struct meta_cluster_header *header = &md->cluster.header;
911         struct meta_cluster_item *item;
912         struct async_work *async;
913         u64 bytenr = 0;
914         u32 nritems = 0;
915         int ret;
916         int err = 0;
917
918         if (list_empty(&md->ordered))
919                 goto out;
920
921         /* wait until all buffers are compressed */
922         while (!err && md->num_items > md->num_ready) {
923                 struct timespec ts = {
924                         .tv_sec = 0,
925                         .tv_nsec = 10000000,
926                 };
927                 pthread_mutex_unlock(&md->mutex);
928                 nanosleep(&ts, NULL);
929                 pthread_mutex_lock(&md->mutex);
930                 err = md->error;
931         }
932
933         if (err) {
934                 error("one of the threads failed: %s", strerror(-err));
935                 goto out;
936         }
937
938         /* setup and write index block */
939         list_for_each_entry(async, &md->ordered, ordered) {
940                 item = &md->cluster.items[nritems];
941                 item->bytenr = cpu_to_le64(async->start);
942                 item->size = cpu_to_le32(async->bufsize);
943                 nritems++;
944         }
945         header->nritems = cpu_to_le32(nritems);
946
947         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
948         if (ret != 1) {
949                 error("unable to write out cluster: %s", strerror(errno));
950                 return -errno;
951         }
952
953         /* write buffers */
954         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
955         while (!list_empty(&md->ordered)) {
956                 async = list_entry(md->ordered.next, struct async_work,
957                                    ordered);
958                 list_del_init(&async->ordered);
959
960                 bytenr += async->bufsize;
961                 if (!err)
962                         ret = fwrite(async->buffer, async->bufsize, 1,
963                                      md->out);
964                 if (ret != 1) {
965                         error("unable to write out cluster: %s",
966                                 strerror(errno));
967                         err = -errno;
968                         ret = 0;
969                 }
970
971                 free(async->buffer);
972                 free(async);
973         }
974
975         /* zero unused space in the last block */
976         if (!err && bytenr & BLOCK_MASK) {
977                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
978
979                 bytenr += size;
980                 ret = write_zero(md->out, size);
981                 if (ret != 1) {
982                         error("unable to zero out buffer: %s",
983                                 strerror(errno));
984                         err = -errno;
985                 }
986         }
987 out:
988         *next = bytenr;
989         return err;
990 }
991
992 static int read_data_extent(struct metadump_struct *md,
993                             struct async_work *async)
994 {
995         struct btrfs_root *root = md->root;
996         struct btrfs_fs_info *fs_info = root->fs_info;
997         u64 bytes_left = async->size;
998         u64 logical = async->start;
999         u64 offset = 0;
1000         u64 read_len;
1001         int num_copies;
1002         int cur_mirror;
1003         int ret;
1004
1005         num_copies = btrfs_num_copies(root->fs_info, logical, bytes_left);
1006
1007         /* Try our best to read data, just like read_tree_block() */
1008         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
1009                 while (bytes_left) {
1010                         read_len = bytes_left;
1011                         ret = read_extent_data(fs_info,
1012                                         (char *)(async->buffer + offset),
1013                                         logical, &read_len, cur_mirror);
1014                         if (ret < 0)
1015                                 break;
1016                         offset += read_len;
1017                         logical += read_len;
1018                         bytes_left -= read_len;
1019                 }
1020         }
1021         if (bytes_left)
1022                 return -EIO;
1023         return 0;
1024 }
1025
1026 static int get_dev_fd(struct btrfs_root *root)
1027 {
1028         struct btrfs_device *dev;
1029
1030         dev = list_first_entry(&root->fs_info->fs_devices->devices,
1031                                struct btrfs_device, dev_list);
1032         return dev->fd;
1033 }
1034
1035 static int flush_pending(struct metadump_struct *md, int done)
1036 {
1037         struct async_work *async = NULL;
1038         struct extent_buffer *eb;
1039         u64 start = 0;
1040         u64 size;
1041         size_t offset;
1042         int ret = 0;
1043
1044         if (md->pending_size) {
1045                 async = calloc(1, sizeof(*async));
1046                 if (!async)
1047                         return -ENOMEM;
1048
1049                 async->start = md->pending_start;
1050                 async->size = md->pending_size;
1051                 async->bufsize = async->size;
1052                 async->buffer = malloc(async->bufsize);
1053                 if (!async->buffer) {
1054                         free(async);
1055                         return -ENOMEM;
1056                 }
1057                 offset = 0;
1058                 start = async->start;
1059                 size = async->size;
1060
1061                 if (md->data) {
1062                         ret = read_data_extent(md, async);
1063                         if (ret) {
1064                                 free(async->buffer);
1065                                 free(async);
1066                                 return ret;
1067                         }
1068                 }
1069
1070                 /*
1071                  * Balance can make the mapping not cover the super block, so
1072                  * just copy directly from one of the devices.
1073                  */
1074                 if (start == BTRFS_SUPER_INFO_OFFSET) {
1075                         int fd = get_dev_fd(md->root);
1076
1077                         ret = pread64(fd, async->buffer, size, start);
1078                         if (ret < size) {
1079                                 free(async->buffer);
1080                                 free(async);
1081                                 error("unable to read superblock at %llu: %s",
1082                                                 (unsigned long long)start,
1083                                                 strerror(errno));
1084                                 return -errno;
1085                         }
1086                         size = 0;
1087                         ret = 0;
1088                 }
1089
1090                 while (!md->data && size > 0) {
1091                         u64 this_read = min((u64)md->root->fs_info->nodesize,
1092                                         size);
1093
1094                         eb = read_tree_block(md->root->fs_info, start, 0);
1095                         if (!extent_buffer_uptodate(eb)) {
1096                                 free(async->buffer);
1097                                 free(async);
1098                                 error("unable to read metadata block %llu",
1099                                         (unsigned long long)start);
1100                                 return -EIO;
1101                         }
1102                         copy_buffer(md, async->buffer + offset, eb);
1103                         free_extent_buffer(eb);
1104                         start += this_read;
1105                         offset += this_read;
1106                         size -= this_read;
1107                 }
1108
1109                 md->pending_start = (u64)-1;
1110                 md->pending_size = 0;
1111         } else if (!done) {
1112                 return 0;
1113         }
1114
1115         pthread_mutex_lock(&md->mutex);
1116         if (async) {
1117                 list_add_tail(&async->ordered, &md->ordered);
1118                 md->num_items++;
1119                 if (md->compress_level > 0) {
1120                         list_add_tail(&async->list, &md->list);
1121                         pthread_cond_signal(&md->cond);
1122                 } else {
1123                         md->num_ready++;
1124                 }
1125         }
1126         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1127                 ret = write_buffers(md, &start);
1128                 if (ret)
1129                         error("unable to write buffers: %s", strerror(-ret));
1130                 else
1131                         meta_cluster_init(md, start);
1132         }
1133         pthread_mutex_unlock(&md->mutex);
1134         return ret;
1135 }
1136
1137 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1138                       int data)
1139 {
1140         int ret;
1141         if (md->data != data ||
1142             md->pending_size + size > MAX_PENDING_SIZE ||
1143             md->pending_start + md->pending_size != start) {
1144                 ret = flush_pending(md, 0);
1145                 if (ret)
1146                         return ret;
1147                 md->pending_start = start;
1148         }
1149         readahead_tree_block(md->root->fs_info, start, 0);
1150         md->pending_size += size;
1151         md->data = data;
1152         return 0;
1153 }
1154
1155 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1156 static int is_tree_block(struct btrfs_root *extent_root,
1157                          struct btrfs_path *path, u64 bytenr)
1158 {
1159         struct extent_buffer *leaf;
1160         struct btrfs_key key;
1161         u64 ref_objectid;
1162         int ret;
1163
1164         leaf = path->nodes[0];
1165         while (1) {
1166                 struct btrfs_extent_ref_v0 *ref_item;
1167                 path->slots[0]++;
1168                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1169                         ret = btrfs_next_leaf(extent_root, path);
1170                         if (ret < 0)
1171                                 return ret;
1172                         if (ret > 0)
1173                                 break;
1174                         leaf = path->nodes[0];
1175                 }
1176                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1177                 if (key.objectid != bytenr)
1178                         break;
1179                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1180                         continue;
1181                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1182                                           struct btrfs_extent_ref_v0);
1183                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1184                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1185                         return 1;
1186                 break;
1187         }
1188         return 0;
1189 }
1190 #endif
1191
1192 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1193                             struct metadump_struct *metadump, int root_tree)
1194 {
1195         struct extent_buffer *tmp;
1196         struct btrfs_root_item *ri;
1197         struct btrfs_key key;
1198         struct btrfs_fs_info *fs_info = root->fs_info;
1199         u64 bytenr;
1200         int level;
1201         int nritems = 0;
1202         int i = 0;
1203         int ret;
1204
1205         ret = add_extent(btrfs_header_bytenr(eb), fs_info->nodesize,
1206                          metadump, 0);
1207         if (ret) {
1208                 error("unable to add metadata block %llu: %d",
1209                                 btrfs_header_bytenr(eb), ret);
1210                 return ret;
1211         }
1212
1213         if (btrfs_header_level(eb) == 0 && !root_tree)
1214                 return 0;
1215
1216         level = btrfs_header_level(eb);
1217         nritems = btrfs_header_nritems(eb);
1218         for (i = 0; i < nritems; i++) {
1219                 if (level == 0) {
1220                         btrfs_item_key_to_cpu(eb, &key, i);
1221                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1222                                 continue;
1223                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1224                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1225                         tmp = read_tree_block(fs_info, bytenr, 0);
1226                         if (!extent_buffer_uptodate(tmp)) {
1227                                 error("unable to read log root block");
1228                                 return -EIO;
1229                         }
1230                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1231                         free_extent_buffer(tmp);
1232                         if (ret)
1233                                 return ret;
1234                 } else {
1235                         bytenr = btrfs_node_blockptr(eb, i);
1236                         tmp = read_tree_block(fs_info, bytenr, 0);
1237                         if (!extent_buffer_uptodate(tmp)) {
1238                                 error("unable to read log root block");
1239                                 return -EIO;
1240                         }
1241                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1242                         free_extent_buffer(tmp);
1243                         if (ret)
1244                                 return ret;
1245                 }
1246         }
1247
1248         return 0;
1249 }
1250
1251 static int copy_log_trees(struct btrfs_root *root,
1252                           struct metadump_struct *metadump)
1253 {
1254         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1255
1256         if (blocknr == 0)
1257                 return 0;
1258
1259         if (!root->fs_info->log_root_tree ||
1260             !root->fs_info->log_root_tree->node) {
1261                 error("unable to copy tree log, it has not been setup");
1262                 return -EIO;
1263         }
1264
1265         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1266                                 metadump, 1);
1267 }
1268
1269 static int copy_space_cache(struct btrfs_root *root,
1270                             struct metadump_struct *metadump,
1271                             struct btrfs_path *path)
1272 {
1273         struct extent_buffer *leaf;
1274         struct btrfs_file_extent_item *fi;
1275         struct btrfs_key key;
1276         u64 bytenr, num_bytes;
1277         int ret;
1278
1279         root = root->fs_info->tree_root;
1280
1281         key.objectid = 0;
1282         key.type = BTRFS_EXTENT_DATA_KEY;
1283         key.offset = 0;
1284
1285         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1286         if (ret < 0) {
1287                 error("free space inode not found: %d", ret);
1288                 return ret;
1289         }
1290
1291         leaf = path->nodes[0];
1292
1293         while (1) {
1294                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1295                         ret = btrfs_next_leaf(root, path);
1296                         if (ret < 0) {
1297                                 error("cannot go to next leaf %d", ret);
1298                                 return ret;
1299                         }
1300                         if (ret > 0)
1301                                 break;
1302                         leaf = path->nodes[0];
1303                 }
1304
1305                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1306                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1307                         path->slots[0]++;
1308                         continue;
1309                 }
1310
1311                 fi = btrfs_item_ptr(leaf, path->slots[0],
1312                                     struct btrfs_file_extent_item);
1313                 if (btrfs_file_extent_type(leaf, fi) !=
1314                     BTRFS_FILE_EXTENT_REG) {
1315                         path->slots[0]++;
1316                         continue;
1317                 }
1318
1319                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1320                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1321                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1322                 if (ret) {
1323                         error("unable to add space cache blocks %d", ret);
1324                         btrfs_release_path(path);
1325                         return ret;
1326                 }
1327                 path->slots[0]++;
1328         }
1329
1330         return 0;
1331 }
1332
1333 static int copy_from_extent_tree(struct metadump_struct *metadump,
1334                                  struct btrfs_path *path)
1335 {
1336         struct btrfs_root *extent_root;
1337         struct extent_buffer *leaf;
1338         struct btrfs_extent_item *ei;
1339         struct btrfs_key key;
1340         u64 bytenr;
1341         u64 num_bytes;
1342         int ret;
1343
1344         extent_root = metadump->root->fs_info->extent_root;
1345         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1346         key.objectid = bytenr;
1347         key.type = BTRFS_EXTENT_ITEM_KEY;
1348         key.offset = 0;
1349
1350         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1351         if (ret < 0) {
1352                 error("extent root not found: %d", ret);
1353                 return ret;
1354         }
1355         ret = 0;
1356
1357         leaf = path->nodes[0];
1358
1359         while (1) {
1360                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1361                         ret = btrfs_next_leaf(extent_root, path);
1362                         if (ret < 0) {
1363                                 error("cannot go to next leaf %d", ret);
1364                                 break;
1365                         }
1366                         if (ret > 0) {
1367                                 ret = 0;
1368                                 break;
1369                         }
1370                         leaf = path->nodes[0];
1371                 }
1372
1373                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1374                 if (key.objectid < bytenr ||
1375                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1376                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1377                         path->slots[0]++;
1378                         continue;
1379                 }
1380
1381                 bytenr = key.objectid;
1382                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1383                         num_bytes = extent_root->fs_info->nodesize;
1384                 } else {
1385                         num_bytes = key.offset;
1386                 }
1387
1388                 if (num_bytes == 0) {
1389                         error("extent length 0 at bytenr %llu key type %d",
1390                                         (unsigned long long)bytenr, key.type);
1391                         ret = -EIO;
1392                         break;
1393                 }
1394
1395                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1396                         ei = btrfs_item_ptr(leaf, path->slots[0],
1397                                             struct btrfs_extent_item);
1398                         if (btrfs_extent_flags(leaf, ei) &
1399                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1400                                 ret = add_extent(bytenr, num_bytes, metadump,
1401                                                  0);
1402                                 if (ret) {
1403                                         error("unable to add block %llu: %d",
1404                                                 (unsigned long long)bytenr, ret);
1405                                         break;
1406                                 }
1407                         }
1408                 } else {
1409 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1410                         ret = is_tree_block(extent_root, path, bytenr);
1411                         if (ret < 0) {
1412                                 error("failed to check tree block %llu: %d",
1413                                         (unsigned long long)bytenr, ret);
1414                                 break;
1415                         }
1416
1417                         if (ret) {
1418                                 ret = add_extent(bytenr, num_bytes, metadump,
1419                                                  0);
1420                                 if (ret) {
1421                                         error("unable to add block %llu: %d",
1422                                                 (unsigned long long)bytenr, ret);
1423                                         break;
1424                                 }
1425                         }
1426                         ret = 0;
1427 #else
1428                         error(
1429         "either extent tree is corrupted or you haven't built with V0 support");
1430                         ret = -EIO;
1431                         break;
1432 #endif
1433                 }
1434                 bytenr += num_bytes;
1435         }
1436
1437         btrfs_release_path(path);
1438
1439         return ret;
1440 }
1441
1442 static int create_metadump(const char *input, FILE *out, int num_threads,
1443                            int compress_level, int sanitize, int walk_trees)
1444 {
1445         struct btrfs_root *root;
1446         struct btrfs_path path;
1447         struct metadump_struct metadump;
1448         int ret;
1449         int err = 0;
1450
1451         root = open_ctree(input, 0, 0);
1452         if (!root) {
1453                 error("open ctree failed");
1454                 return -EIO;
1455         }
1456
1457         ret = metadump_init(&metadump, root, out, num_threads,
1458                             compress_level, sanitize);
1459         if (ret) {
1460                 error("failed to initialize metadump: %d", ret);
1461                 close_ctree(root);
1462                 return ret;
1463         }
1464
1465         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1466                         &metadump, 0);
1467         if (ret) {
1468                 error("unable to add metadata: %d", ret);
1469                 err = ret;
1470                 goto out;
1471         }
1472
1473         btrfs_init_path(&path);
1474
1475         if (walk_trees) {
1476                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1477                                        &metadump, 1);
1478                 if (ret) {
1479                         err = ret;
1480                         goto out;
1481                 }
1482
1483                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1484                                        &metadump, 1);
1485                 if (ret) {
1486                         err = ret;
1487                         goto out;
1488                 }
1489         } else {
1490                 ret = copy_from_extent_tree(&metadump, &path);
1491                 if (ret) {
1492                         err = ret;
1493                         goto out;
1494                 }
1495         }
1496
1497         ret = copy_log_trees(root, &metadump);
1498         if (ret) {
1499                 err = ret;
1500                 goto out;
1501         }
1502
1503         ret = copy_space_cache(root, &metadump, &path);
1504 out:
1505         ret = flush_pending(&metadump, 1);
1506         if (ret) {
1507                 if (!err)
1508                         err = ret;
1509                 error("failed to flush pending data: %d", ret);
1510         }
1511
1512         metadump_destroy(&metadump, num_threads);
1513
1514         btrfs_release_path(&path);
1515         ret = close_ctree(root);
1516         return err ? err : ret;
1517 }
1518
1519 static void update_super_old(u8 *buffer)
1520 {
1521         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1522         struct btrfs_chunk *chunk;
1523         struct btrfs_disk_key *key;
1524         u32 sectorsize = btrfs_super_sectorsize(super);
1525         u64 flags = btrfs_super_flags(super);
1526
1527         flags |= BTRFS_SUPER_FLAG_METADUMP;
1528         btrfs_set_super_flags(super, flags);
1529
1530         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1531         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1532                                        sizeof(struct btrfs_disk_key));
1533
1534         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1535         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1536         btrfs_set_disk_key_offset(key, 0);
1537
1538         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1539         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1540         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1541         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1542         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1543         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1544         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1545         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1546         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1547         chunk->stripe.devid = super->dev_item.devid;
1548         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1549         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1550         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1551         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1552 }
1553
1554 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1555 {
1556         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1557         struct btrfs_chunk *chunk;
1558         struct btrfs_disk_key *disk_key;
1559         struct btrfs_key key;
1560         u64 flags = btrfs_super_flags(super);
1561         u32 new_array_size = 0;
1562         u32 array_size;
1563         u32 cur = 0;
1564         u8 *ptr, *write_ptr;
1565         int old_num_stripes;
1566
1567         write_ptr = ptr = super->sys_chunk_array;
1568         array_size = btrfs_super_sys_array_size(super);
1569
1570         while (cur < array_size) {
1571                 disk_key = (struct btrfs_disk_key *)ptr;
1572                 btrfs_disk_key_to_cpu(&key, disk_key);
1573
1574                 new_array_size += sizeof(*disk_key);
1575                 memmove(write_ptr, ptr, sizeof(*disk_key));
1576
1577                 write_ptr += sizeof(*disk_key);
1578                 ptr += sizeof(*disk_key);
1579                 cur += sizeof(*disk_key);
1580
1581                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1582                         u64 type, physical, physical_dup, size = 0;
1583
1584                         chunk = (struct btrfs_chunk *)ptr;
1585                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1586                         chunk = (struct btrfs_chunk *)write_ptr;
1587
1588                         memmove(write_ptr, ptr, sizeof(*chunk));
1589                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1590                         type = btrfs_stack_chunk_type(chunk);
1591                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1592                                 new_array_size += sizeof(struct btrfs_stripe);
1593                                 write_ptr += sizeof(struct btrfs_stripe);
1594                         } else {
1595                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1596                                 btrfs_set_stack_chunk_type(chunk,
1597                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1598                         }
1599                         chunk->stripe.devid = super->dev_item.devid;
1600                         physical = logical_to_physical(mdres, key.offset,
1601                                                        &size, &physical_dup);
1602                         if (size != (u64)-1)
1603                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1604                                                               physical);
1605                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1606                                BTRFS_UUID_SIZE);
1607                         new_array_size += sizeof(*chunk);
1608                 } else {
1609                         error("bogus key in the sys array %d", key.type);
1610                         return -EIO;
1611                 }
1612                 write_ptr += sizeof(*chunk);
1613                 ptr += btrfs_chunk_item_size(old_num_stripes);
1614                 cur += btrfs_chunk_item_size(old_num_stripes);
1615         }
1616
1617         if (mdres->clear_space_cache)
1618                 btrfs_set_super_cache_generation(super, 0);
1619
1620         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1621         btrfs_set_super_flags(super, flags);
1622         btrfs_set_super_sys_array_size(super, new_array_size);
1623         btrfs_set_super_num_devices(super, 1);
1624         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1625
1626         return 0;
1627 }
1628
1629 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1630 {
1631         struct extent_buffer *eb;
1632
1633         eb = calloc(1, sizeof(struct extent_buffer) + size);
1634         if (!eb)
1635                 return NULL;
1636
1637         eb->start = bytenr;
1638         eb->len = size;
1639         return eb;
1640 }
1641
1642 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1643 {
1644         struct btrfs_item *item;
1645         u32 nritems;
1646         u32 old_size;
1647         u32 old_data_start;
1648         u32 size_diff;
1649         u32 data_end;
1650         int i;
1651
1652         old_size = btrfs_item_size_nr(eb, slot);
1653         if (old_size == new_size)
1654                 return;
1655
1656         nritems = btrfs_header_nritems(eb);
1657         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1658
1659         old_data_start = btrfs_item_offset_nr(eb, slot);
1660         size_diff = old_size - new_size;
1661
1662         for (i = slot; i < nritems; i++) {
1663                 u32 ioff;
1664                 item = btrfs_item_nr(i);
1665                 ioff = btrfs_item_offset(eb, item);
1666                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1667         }
1668
1669         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1670                               btrfs_leaf_data(eb) + data_end,
1671                               old_data_start + new_size - data_end);
1672         item = btrfs_item_nr(slot);
1673         btrfs_set_item_size(eb, item, new_size);
1674 }
1675
1676 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1677                                   struct async_work *async, u8 *buffer,
1678                                   size_t size)
1679 {
1680         struct extent_buffer *eb;
1681         size_t size_left = size;
1682         u64 bytenr = async->start;
1683         int i;
1684
1685         if (size_left % mdres->nodesize)
1686                 return 0;
1687
1688         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1689         if (!eb)
1690                 return -ENOMEM;
1691
1692         while (size_left) {
1693                 eb->start = bytenr;
1694                 memcpy(eb->data, buffer, mdres->nodesize);
1695
1696                 if (btrfs_header_bytenr(eb) != bytenr)
1697                         break;
1698                 if (memcmp(mdres->fsid,
1699                            eb->data + offsetof(struct btrfs_header, fsid),
1700                            BTRFS_FSID_SIZE))
1701                         break;
1702
1703                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1704                         goto next;
1705
1706                 if (btrfs_header_level(eb) != 0)
1707                         goto next;
1708
1709                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1710                         struct btrfs_chunk *chunk;
1711                         struct btrfs_key key;
1712                         u64 type, physical, physical_dup, size = (u64)-1;
1713
1714                         btrfs_item_key_to_cpu(eb, &key, i);
1715                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1716                                 continue;
1717
1718                         size = 0;
1719                         physical = logical_to_physical(mdres, key.offset,
1720                                                        &size, &physical_dup);
1721
1722                         if (!physical_dup)
1723                                 truncate_item(eb, i, sizeof(*chunk));
1724                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1725
1726
1727                         /* Zero out the RAID profile */
1728                         type = btrfs_chunk_type(eb, chunk);
1729                         type &= (BTRFS_BLOCK_GROUP_DATA |
1730                                  BTRFS_BLOCK_GROUP_SYSTEM |
1731                                  BTRFS_BLOCK_GROUP_METADATA |
1732                                  BTRFS_BLOCK_GROUP_DUP);
1733                         btrfs_set_chunk_type(eb, chunk, type);
1734
1735                         if (!physical_dup)
1736                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1737                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1738                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1739                         if (size != (u64)-1)
1740                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1741                                                            physical);
1742                         /* update stripe 2 offset */
1743                         if (physical_dup)
1744                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1745                                                            physical_dup);
1746
1747                         write_extent_buffer(eb, mdres->uuid,
1748                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1749                                                 chunk, 0),
1750                                         BTRFS_UUID_SIZE);
1751                 }
1752                 memcpy(buffer, eb->data, eb->len);
1753                 csum_block(buffer, eb->len);
1754 next:
1755                 size_left -= mdres->nodesize;
1756                 buffer += mdres->nodesize;
1757                 bytenr += mdres->nodesize;
1758         }
1759
1760         free(eb);
1761         return 0;
1762 }
1763
1764 static void write_backup_supers(int fd, u8 *buf)
1765 {
1766         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1767         struct stat st;
1768         u64 size;
1769         u64 bytenr;
1770         int i;
1771         int ret;
1772
1773         if (fstat(fd, &st)) {
1774                 error(
1775         "cannot stat restore point, won't be able to write backup supers: %s",
1776                         strerror(errno));
1777                 return;
1778         }
1779
1780         size = btrfs_device_size(fd, &st);
1781
1782         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1783                 bytenr = btrfs_sb_offset(i);
1784                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1785                         break;
1786                 btrfs_set_super_bytenr(super, bytenr);
1787                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1788                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1789                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1790                         if (ret < 0)
1791                                 error(
1792                                 "problem writing out backup super block %d: %s",
1793                                                 i, strerror(errno));
1794                         else
1795                                 error("short write writing out backup super block");
1796                         break;
1797                 }
1798         }
1799 }
1800
1801 static void *restore_worker(void *data)
1802 {
1803         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1804         struct async_work *async;
1805         size_t size;
1806         u8 *buffer;
1807         u8 *outbuf;
1808         int outfd;
1809         int ret;
1810         int compress_size = MAX_PENDING_SIZE * 4;
1811
1812         outfd = fileno(mdres->out);
1813         buffer = malloc(compress_size);
1814         if (!buffer) {
1815                 error("not enough memory for restore worker buffer");
1816                 pthread_mutex_lock(&mdres->mutex);
1817                 if (!mdres->error)
1818                         mdres->error = -ENOMEM;
1819                 pthread_mutex_unlock(&mdres->mutex);
1820                 pthread_exit(NULL);
1821         }
1822
1823         while (1) {
1824                 u64 bytenr, physical_dup;
1825                 off_t offset = 0;
1826                 int err = 0;
1827
1828                 pthread_mutex_lock(&mdres->mutex);
1829                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1830                         if (mdres->done) {
1831                                 pthread_mutex_unlock(&mdres->mutex);
1832                                 goto out;
1833                         }
1834                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1835                 }
1836                 async = list_entry(mdres->list.next, struct async_work, list);
1837                 list_del_init(&async->list);
1838
1839                 if (mdres->compress_method == COMPRESS_ZLIB) {
1840                         size = compress_size; 
1841                         pthread_mutex_unlock(&mdres->mutex);
1842                         ret = uncompress(buffer, (unsigned long *)&size,
1843                                          async->buffer, async->bufsize);
1844                         pthread_mutex_lock(&mdres->mutex);
1845                         if (ret != Z_OK) {
1846                                 error("decompression failed with %d", ret);
1847                                 err = -EIO;
1848                         }
1849                         outbuf = buffer;
1850                 } else {
1851                         outbuf = async->buffer;
1852                         size = async->bufsize;
1853                 }
1854
1855                 if (!mdres->multi_devices) {
1856                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1857                                 if (mdres->old_restore) {
1858                                         update_super_old(outbuf);
1859                                 } else {
1860                                         ret = update_super(mdres, outbuf);
1861                                         if (ret)
1862                                                 err = ret;
1863                                 }
1864                         } else if (!mdres->old_restore) {
1865                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1866                                 if (ret)
1867                                         err = ret;
1868                         }
1869                 }
1870
1871                 if (!mdres->fixup_offset) {
1872                         while (size) {
1873                                 u64 chunk_size = size;
1874                                 physical_dup = 0;
1875                                 if (!mdres->multi_devices && !mdres->old_restore)
1876                                         bytenr = logical_to_physical(mdres,
1877                                                      async->start + offset,
1878                                                      &chunk_size,
1879                                                      &physical_dup);
1880                                 else
1881                                         bytenr = async->start + offset;
1882
1883                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1884                                                bytenr);
1885                                 if (ret != chunk_size)
1886                                         goto error;
1887
1888                                 if (physical_dup)
1889                                         ret = pwrite64(outfd, outbuf+offset,
1890                                                        chunk_size,
1891                                                        physical_dup);
1892                                 if (ret != chunk_size)
1893                                         goto error;
1894
1895                                 size -= chunk_size;
1896                                 offset += chunk_size;
1897                                 continue;
1898
1899 error:
1900                                 if (ret < 0) {
1901                                         error("unable to write to device: %s",
1902                                                         strerror(errno));
1903                                         err = errno;
1904                                 } else {
1905                                         error("short write");
1906                                         err = -EIO;
1907                                 }
1908                         }
1909                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1910                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1911                         if (ret) {
1912                                 error("failed to write data");
1913                                 exit(1);
1914                         }
1915                 }
1916
1917
1918                 /* backup super blocks are already there at fixup_offset stage */
1919                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1920                         write_backup_supers(outfd, outbuf);
1921
1922                 if (err && !mdres->error)
1923                         mdres->error = err;
1924                 mdres->num_items--;
1925                 pthread_mutex_unlock(&mdres->mutex);
1926
1927                 free(async->buffer);
1928                 free(async);
1929         }
1930 out:
1931         free(buffer);
1932         pthread_exit(NULL);
1933 }
1934
1935 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1936 {
1937         struct rb_node *n;
1938         int i;
1939
1940         while ((n = rb_first(&mdres->chunk_tree))) {
1941                 struct fs_chunk *entry;
1942
1943                 entry = rb_entry(n, struct fs_chunk, l);
1944                 rb_erase(n, &mdres->chunk_tree);
1945                 rb_erase(&entry->p, &mdres->physical_tree);
1946                 free(entry);
1947         }
1948         pthread_mutex_lock(&mdres->mutex);
1949         mdres->done = 1;
1950         pthread_cond_broadcast(&mdres->cond);
1951         pthread_mutex_unlock(&mdres->mutex);
1952
1953         for (i = 0; i < num_threads; i++)
1954                 pthread_join(mdres->threads[i], NULL);
1955
1956         pthread_cond_destroy(&mdres->cond);
1957         pthread_mutex_destroy(&mdres->mutex);
1958 }
1959
1960 static int mdrestore_init(struct mdrestore_struct *mdres,
1961                           FILE *in, FILE *out, int old_restore,
1962                           int num_threads, int fixup_offset,
1963                           struct btrfs_fs_info *info, int multi_devices)
1964 {
1965         int i, ret = 0;
1966
1967         memset(mdres, 0, sizeof(*mdres));
1968         pthread_cond_init(&mdres->cond, NULL);
1969         pthread_mutex_init(&mdres->mutex, NULL);
1970         INIT_LIST_HEAD(&mdres->list);
1971         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1972         mdres->in = in;
1973         mdres->out = out;
1974         mdres->old_restore = old_restore;
1975         mdres->chunk_tree.rb_node = NULL;
1976         mdres->fixup_offset = fixup_offset;
1977         mdres->info = info;
1978         mdres->multi_devices = multi_devices;
1979         mdres->clear_space_cache = 0;
1980         mdres->last_physical_offset = 0;
1981         mdres->alloced_chunks = 0;
1982
1983         if (!num_threads)
1984                 return 0;
1985
1986         mdres->num_threads = num_threads;
1987         for (i = 0; i < num_threads; i++) {
1988                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1989                                      mdres);
1990                 if (ret) {
1991                         /* pthread_create returns errno directly */
1992                         ret = -ret;
1993                         break;
1994                 }
1995         }
1996         if (ret)
1997                 mdrestore_destroy(mdres, i + 1);
1998         return ret;
1999 }
2000
2001 static int fill_mdres_info(struct mdrestore_struct *mdres,
2002                            struct async_work *async)
2003 {
2004         struct btrfs_super_block *super;
2005         u8 *buffer = NULL;
2006         u8 *outbuf;
2007         int ret;
2008
2009         /* We've already been initialized */
2010         if (mdres->nodesize)
2011                 return 0;
2012
2013         if (mdres->compress_method == COMPRESS_ZLIB) {
2014                 size_t size = MAX_PENDING_SIZE * 2;
2015
2016                 buffer = malloc(MAX_PENDING_SIZE * 2);
2017                 if (!buffer)
2018                         return -ENOMEM;
2019                 ret = uncompress(buffer, (unsigned long *)&size,
2020                                  async->buffer, async->bufsize);
2021                 if (ret != Z_OK) {
2022                         error("decompression failed with %d", ret);
2023                         free(buffer);
2024                         return -EIO;
2025                 }
2026                 outbuf = buffer;
2027         } else {
2028                 outbuf = async->buffer;
2029         }
2030
2031         super = (struct btrfs_super_block *)outbuf;
2032         mdres->nodesize = btrfs_super_nodesize(super);
2033         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2034         memcpy(mdres->uuid, super->dev_item.uuid,
2035                        BTRFS_UUID_SIZE);
2036         mdres->devid = le64_to_cpu(super->dev_item.devid);
2037         free(buffer);
2038         return 0;
2039 }
2040
2041 static int add_cluster(struct meta_cluster *cluster,
2042                        struct mdrestore_struct *mdres, u64 *next)
2043 {
2044         struct meta_cluster_item *item;
2045         struct meta_cluster_header *header = &cluster->header;
2046         struct async_work *async;
2047         u64 bytenr;
2048         u32 i, nritems;
2049         int ret;
2050
2051         pthread_mutex_lock(&mdres->mutex);
2052         mdres->compress_method = header->compress;
2053         pthread_mutex_unlock(&mdres->mutex);
2054
2055         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
2056         nritems = le32_to_cpu(header->nritems);
2057         for (i = 0; i < nritems; i++) {
2058                 item = &cluster->items[i];
2059                 async = calloc(1, sizeof(*async));
2060                 if (!async) {
2061                         error("not enough memory for async data");
2062                         return -ENOMEM;
2063                 }
2064                 async->start = le64_to_cpu(item->bytenr);
2065                 async->bufsize = le32_to_cpu(item->size);
2066                 async->buffer = malloc(async->bufsize);
2067                 if (!async->buffer) {
2068                         error("not enough memory for async buffer");
2069                         free(async);
2070                         return -ENOMEM;
2071                 }
2072                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
2073                 if (ret != 1) {
2074                         error("unable to read buffer: %s", strerror(errno));
2075                         free(async->buffer);
2076                         free(async);
2077                         return -EIO;
2078                 }
2079                 bytenr += async->bufsize;
2080
2081                 pthread_mutex_lock(&mdres->mutex);
2082                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
2083                         ret = fill_mdres_info(mdres, async);
2084                         if (ret) {
2085                                 error("unable to set up restore state");
2086                                 pthread_mutex_unlock(&mdres->mutex);
2087                                 free(async->buffer);
2088                                 free(async);
2089                                 return ret;
2090                         }
2091                 }
2092                 list_add_tail(&async->list, &mdres->list);
2093                 mdres->num_items++;
2094                 pthread_cond_signal(&mdres->cond);
2095                 pthread_mutex_unlock(&mdres->mutex);
2096         }
2097         if (bytenr & BLOCK_MASK) {
2098                 char buffer[BLOCK_MASK];
2099                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
2100
2101                 bytenr += size;
2102                 ret = fread(buffer, size, 1, mdres->in);
2103                 if (ret != 1) {
2104                         error("failed to read buffer: %s", strerror(errno));
2105                         return -EIO;
2106                 }
2107         }
2108         *next = bytenr;
2109         return 0;
2110 }
2111
2112 static int wait_for_worker(struct mdrestore_struct *mdres)
2113 {
2114         int ret = 0;
2115
2116         pthread_mutex_lock(&mdres->mutex);
2117         ret = mdres->error;
2118         while (!ret && mdres->num_items > 0) {
2119                 struct timespec ts = {
2120                         .tv_sec = 0,
2121                         .tv_nsec = 10000000,
2122                 };
2123                 pthread_mutex_unlock(&mdres->mutex);
2124                 nanosleep(&ts, NULL);
2125                 pthread_mutex_lock(&mdres->mutex);
2126                 ret = mdres->error;
2127         }
2128         pthread_mutex_unlock(&mdres->mutex);
2129         return ret;
2130 }
2131
2132 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2133                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2134                             u64 cluster_bytenr)
2135 {
2136         struct extent_buffer *eb;
2137         int ret = 0;
2138         int i;
2139
2140         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2141         if (!eb) {
2142                 ret = -ENOMEM;
2143                 goto out;
2144         }
2145
2146         while (item_bytenr != bytenr) {
2147                 buffer += mdres->nodesize;
2148                 item_bytenr += mdres->nodesize;
2149         }
2150
2151         memcpy(eb->data, buffer, mdres->nodesize);
2152         if (btrfs_header_bytenr(eb) != bytenr) {
2153                 error("eb bytenr does not match found bytenr: %llu != %llu",
2154                                 (unsigned long long)btrfs_header_bytenr(eb),
2155                                 (unsigned long long)bytenr);
2156                 ret = -EIO;
2157                 goto out;
2158         }
2159
2160         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2161                    BTRFS_FSID_SIZE)) {
2162                 error("filesystem UUID of eb %llu does not match",
2163                                 (unsigned long long)bytenr);
2164                 ret = -EIO;
2165                 goto out;
2166         }
2167
2168         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2169                 error("wrong eb %llu owner %llu",
2170                                 (unsigned long long)bytenr,
2171                                 (unsigned long long)btrfs_header_owner(eb));
2172                 ret = -EIO;
2173                 goto out;
2174         }
2175
2176         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2177                 struct btrfs_chunk *chunk;
2178                 struct fs_chunk *fs_chunk;
2179                 struct btrfs_key key;
2180                 u64 type;
2181
2182                 if (btrfs_header_level(eb)) {
2183                         u64 blockptr = btrfs_node_blockptr(eb, i);
2184
2185                         ret = search_for_chunk_blocks(mdres, blockptr,
2186                                                       cluster_bytenr);
2187                         if (ret)
2188                                 break;
2189                         continue;
2190                 }
2191
2192                 /* Yay a leaf!  We loves leafs! */
2193                 btrfs_item_key_to_cpu(eb, &key, i);
2194                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2195                         continue;
2196
2197                 fs_chunk = malloc(sizeof(struct fs_chunk));
2198                 if (!fs_chunk) {
2199                         error("not enough memory to allocate chunk");
2200                         ret = -ENOMEM;
2201                         break;
2202                 }
2203                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2204                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2205
2206                 fs_chunk->logical = key.offset;
2207                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2208                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2209                 INIT_LIST_HEAD(&fs_chunk->list);
2210                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2211                                 physical_cmp, 1) != NULL)
2212                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2213                 else
2214                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2215                                     physical_cmp);
2216
2217                 type = btrfs_chunk_type(eb, chunk);
2218                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2219                         fs_chunk->physical_dup =
2220                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2221                 }
2222
2223                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2224                     mdres->last_physical_offset)
2225                         mdres->last_physical_offset = fs_chunk->physical_dup +
2226                                 fs_chunk->bytes;
2227                 else if (fs_chunk->physical + fs_chunk->bytes >
2228                     mdres->last_physical_offset)
2229                         mdres->last_physical_offset = fs_chunk->physical +
2230                                 fs_chunk->bytes;
2231                 mdres->alloced_chunks += fs_chunk->bytes;
2232                 /* in dup case, fs_chunk->bytes should add twice */
2233                 if (fs_chunk->physical_dup)
2234                         mdres->alloced_chunks += fs_chunk->bytes;
2235                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2236         }
2237 out:
2238         free(eb);
2239         return ret;
2240 }
2241
2242 /* If you have to ask you aren't worthy */
2243 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2244                                    u64 search, u64 cluster_bytenr)
2245 {
2246         struct meta_cluster *cluster;
2247         struct meta_cluster_header *header;
2248         struct meta_cluster_item *item;
2249         u64 current_cluster = cluster_bytenr, bytenr;
2250         u64 item_bytenr;
2251         u32 bufsize, nritems, i;
2252         u32 max_size = MAX_PENDING_SIZE * 2;
2253         u8 *buffer, *tmp = NULL;
2254         int ret = 0;
2255
2256         cluster = malloc(BLOCK_SIZE);
2257         if (!cluster) {
2258                 error("not enough memory for cluster");
2259                 return -ENOMEM;
2260         }
2261
2262         buffer = malloc(max_size);
2263         if (!buffer) {
2264                 error("not enough memory for buffer");
2265                 free(cluster);
2266                 return -ENOMEM;
2267         }
2268
2269         if (mdres->compress_method == COMPRESS_ZLIB) {
2270                 tmp = malloc(max_size);
2271                 if (!tmp) {
2272                         error("not enough memory for buffer");
2273                         free(cluster);
2274                         free(buffer);
2275                         return -ENOMEM;
2276                 }
2277         }
2278
2279         bytenr = current_cluster;
2280         while (1) {
2281                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2282                         error("seek failed: %s", strerror(errno));
2283                         ret = -EIO;
2284                         break;
2285                 }
2286
2287                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2288                 if (ret == 0) {
2289                         if (cluster_bytenr != 0) {
2290                                 cluster_bytenr = 0;
2291                                 current_cluster = 0;
2292                                 bytenr = 0;
2293                                 continue;
2294                         }
2295                         error(
2296         "unknown state after reading cluster at %llu, probably corrupted data",
2297                                         cluster_bytenr);
2298                         ret = -EIO;
2299                         break;
2300                 } else if (ret < 0) {
2301                         error("unable to read image at %llu: %s",
2302                                         (unsigned long long)cluster_bytenr,
2303                                         strerror(errno));
2304                         break;
2305                 }
2306                 ret = 0;
2307
2308                 header = &cluster->header;
2309                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2310                     le64_to_cpu(header->bytenr) != current_cluster) {
2311                         error("bad header in metadump image");
2312                         ret = -EIO;
2313                         break;
2314                 }
2315
2316                 bytenr += BLOCK_SIZE;
2317                 nritems = le32_to_cpu(header->nritems);
2318                 for (i = 0; i < nritems; i++) {
2319                         size_t size;
2320
2321                         item = &cluster->items[i];
2322                         bufsize = le32_to_cpu(item->size);
2323                         item_bytenr = le64_to_cpu(item->bytenr);
2324
2325                         if (bufsize > max_size) {
2326                                 error("item %u too big: %u > %u", i, bufsize,
2327                                                 max_size);
2328                                 ret = -EIO;
2329                                 break;
2330                         }
2331
2332                         if (mdres->compress_method == COMPRESS_ZLIB) {
2333                                 ret = fread(tmp, bufsize, 1, mdres->in);
2334                                 if (ret != 1) {
2335                                         error("read error: %s", strerror(errno));
2336                                         ret = -EIO;
2337                                         break;
2338                                 }
2339
2340                                 size = max_size;
2341                                 ret = uncompress(buffer,
2342                                                  (unsigned long *)&size, tmp,
2343                                                  bufsize);
2344                                 if (ret != Z_OK) {
2345                                         error("decompression failed with %d",
2346                                                         ret);
2347                                         ret = -EIO;
2348                                         break;
2349                                 }
2350                         } else {
2351                                 ret = fread(buffer, bufsize, 1, mdres->in);
2352                                 if (ret != 1) {
2353                                         error("read error: %s",
2354                                                         strerror(errno));
2355                                         ret = -EIO;
2356                                         break;
2357                                 }
2358                                 size = bufsize;
2359                         }
2360                         ret = 0;
2361
2362                         if (item_bytenr <= search &&
2363                             item_bytenr + size > search) {
2364                                 ret = read_chunk_block(mdres, buffer, search,
2365                                                        item_bytenr, size,
2366                                                        current_cluster);
2367                                 if (!ret)
2368                                         ret = 1;
2369                                 break;
2370                         }
2371                         bytenr += bufsize;
2372                 }
2373                 if (ret) {
2374                         if (ret > 0)
2375                                 ret = 0;
2376                         break;
2377                 }
2378                 if (bytenr & BLOCK_MASK)
2379                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2380                 current_cluster = bytenr;
2381         }
2382
2383         free(tmp);
2384         free(buffer);
2385         free(cluster);
2386         return ret;
2387 }
2388
2389 static int build_chunk_tree(struct mdrestore_struct *mdres,
2390                             struct meta_cluster *cluster)
2391 {
2392         struct btrfs_super_block *super;
2393         struct meta_cluster_header *header;
2394         struct meta_cluster_item *item = NULL;
2395         u64 chunk_root_bytenr = 0;
2396         u32 i, nritems;
2397         u64 bytenr = 0;
2398         u8 *buffer;
2399         int ret;
2400
2401         /* We can't seek with stdin so don't bother doing this */
2402         if (mdres->in == stdin)
2403                 return 0;
2404
2405         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2406         if (ret <= 0) {
2407                 error("unable to read cluster: %s", strerror(errno));
2408                 return -EIO;
2409         }
2410         ret = 0;
2411
2412         header = &cluster->header;
2413         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2414             le64_to_cpu(header->bytenr) != 0) {
2415                 error("bad header in metadump image");
2416                 return -EIO;
2417         }
2418
2419         bytenr += BLOCK_SIZE;
2420         mdres->compress_method = header->compress;
2421         nritems = le32_to_cpu(header->nritems);
2422         for (i = 0; i < nritems; i++) {
2423                 item = &cluster->items[i];
2424
2425                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2426                         break;
2427                 bytenr += le32_to_cpu(item->size);
2428                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2429                         error("seek failed: %s", strerror(errno));
2430                         return -EIO;
2431                 }
2432         }
2433
2434         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2435                 error("did not find superblock at %llu",
2436                                 le64_to_cpu(item->bytenr));
2437                 return -EINVAL;
2438         }
2439
2440         buffer = malloc(le32_to_cpu(item->size));
2441         if (!buffer) {
2442                 error("not enough memory to allocate buffer");
2443                 return -ENOMEM;
2444         }
2445
2446         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2447         if (ret != 1) {
2448                 error("unable to read buffer: %s", strerror(errno));
2449                 free(buffer);
2450                 return -EIO;
2451         }
2452
2453         if (mdres->compress_method == COMPRESS_ZLIB) {
2454                 size_t size = MAX_PENDING_SIZE * 2;
2455                 u8 *tmp;
2456
2457                 tmp = malloc(MAX_PENDING_SIZE * 2);
2458                 if (!tmp) {
2459                         free(buffer);
2460                         return -ENOMEM;
2461                 }
2462                 ret = uncompress(tmp, (unsigned long *)&size,
2463                                  buffer, le32_to_cpu(item->size));
2464                 if (ret != Z_OK) {
2465                         error("decompression failed with %d", ret);
2466                         free(buffer);
2467                         free(tmp);
2468                         return -EIO;
2469                 }
2470                 free(buffer);
2471                 buffer = tmp;
2472         }
2473
2474         pthread_mutex_lock(&mdres->mutex);
2475         super = (struct btrfs_super_block *)buffer;
2476         chunk_root_bytenr = btrfs_super_chunk_root(super);
2477         mdres->nodesize = btrfs_super_nodesize(super);
2478         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2479         memcpy(mdres->uuid, super->dev_item.uuid,
2480                        BTRFS_UUID_SIZE);
2481         mdres->devid = le64_to_cpu(super->dev_item.devid);
2482         free(buffer);
2483         pthread_mutex_unlock(&mdres->mutex);
2484
2485         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2486 }
2487
2488 static int range_contains_super(u64 physical, u64 bytes)
2489 {
2490         u64 super_bytenr;
2491         int i;
2492
2493         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2494                 super_bytenr = btrfs_sb_offset(i);
2495                 if (super_bytenr >= physical &&
2496                     super_bytenr < physical + bytes)
2497                         return 1;
2498         }
2499
2500         return 0;
2501 }
2502
2503 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2504 {
2505         struct fs_chunk *fs_chunk;
2506
2507         while (!list_empty(&mdres->overlapping_chunks)) {
2508                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2509                                             struct fs_chunk, list);
2510                 list_del_init(&fs_chunk->list);
2511                 if (range_contains_super(fs_chunk->physical,
2512                                          fs_chunk->bytes)) {
2513                         warning(
2514 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2515                         mdres->clear_space_cache = 1;
2516                 }
2517                 fs_chunk->physical = mdres->last_physical_offset;
2518                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2519                 mdres->last_physical_offset += fs_chunk->bytes;
2520         }
2521 }
2522
2523 static int fixup_devices(struct btrfs_fs_info *fs_info,
2524                          struct mdrestore_struct *mdres, off_t dev_size)
2525 {
2526         struct btrfs_trans_handle *trans;
2527         struct btrfs_dev_item *dev_item;
2528         struct btrfs_path path;
2529         struct extent_buffer *leaf;
2530         struct btrfs_root *root = fs_info->chunk_root;
2531         struct btrfs_key key;
2532         u64 devid, cur_devid;
2533         int ret;
2534
2535         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2536         if (IS_ERR(trans)) {
2537                 error("cannot starting transaction %ld", PTR_ERR(trans));
2538                 return PTR_ERR(trans);
2539         }
2540
2541         dev_item = &fs_info->super_copy->dev_item;
2542
2543         devid = btrfs_stack_device_id(dev_item);
2544
2545         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2546         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2547
2548         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2549         key.type = BTRFS_DEV_ITEM_KEY;
2550         key.offset = 0;
2551
2552         btrfs_init_path(&path);
2553
2554 again:
2555         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2556         if (ret < 0) {
2557                 error("search failed: %d", ret);
2558                 exit(1);
2559         }
2560
2561         while (1) {
2562                 leaf = path.nodes[0];
2563                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2564                         ret = btrfs_next_leaf(root, &path);
2565                         if (ret < 0) {
2566                                 error("cannot go to next leaf %d", ret);
2567                                 exit(1);
2568                         }
2569                         if (ret > 0) {
2570                                 ret = 0;
2571                                 break;
2572                         }
2573                         leaf = path.nodes[0];
2574                 }
2575
2576                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2577                 if (key.type > BTRFS_DEV_ITEM_KEY)
2578                         break;
2579                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2580                         path.slots[0]++;
2581                         continue;
2582                 }
2583
2584                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2585                                           struct btrfs_dev_item);
2586                 cur_devid = btrfs_device_id(leaf, dev_item);
2587                 if (devid != cur_devid) {
2588                         ret = btrfs_del_item(trans, root, &path);
2589                         if (ret) {
2590                                 error("cannot delete item: %d", ret);
2591                                 exit(1);
2592                         }
2593                         btrfs_release_path(&path);
2594                         goto again;
2595                 }
2596
2597                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2598                 btrfs_set_device_bytes_used(leaf, dev_item,
2599                                             mdres->alloced_chunks);
2600                 btrfs_mark_buffer_dirty(leaf);
2601                 path.slots[0]++;
2602         }
2603
2604         btrfs_release_path(&path);
2605         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2606         if (ret) {
2607                 error("unable to commit transaction: %d", ret);
2608                 return ret;
2609         }
2610         return 0;
2611 }
2612
2613 static int restore_metadump(const char *input, FILE *out, int old_restore,
2614                             int num_threads, int fixup_offset,
2615                             const char *target, int multi_devices)
2616 {
2617         struct meta_cluster *cluster = NULL;
2618         struct meta_cluster_header *header;
2619         struct mdrestore_struct mdrestore;
2620         struct btrfs_fs_info *info = NULL;
2621         u64 bytenr = 0;
2622         FILE *in = NULL;
2623         int ret = 0;
2624
2625         if (!strcmp(input, "-")) {
2626                 in = stdin;
2627         } else {
2628                 in = fopen(input, "r");
2629                 if (!in) {
2630                         error("unable to open metadump image: %s",
2631                                         strerror(errno));
2632                         return 1;
2633                 }
2634         }
2635
2636         /* NOTE: open with write mode */
2637         if (fixup_offset) {
2638                 info = open_ctree_fs_info(target, 0, 0, 0,
2639                                           OPEN_CTREE_WRITES |
2640                                           OPEN_CTREE_RESTORE |
2641                                           OPEN_CTREE_PARTIAL);
2642                 if (!info) {
2643                         error("open ctree failed");
2644                         ret = -EIO;
2645                         goto failed_open;
2646                 }
2647         }
2648
2649         cluster = malloc(BLOCK_SIZE);
2650         if (!cluster) {
2651                 error("not enough memory for cluster");
2652                 ret = -ENOMEM;
2653                 goto failed_info;
2654         }
2655
2656         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2657                              fixup_offset, info, multi_devices);
2658         if (ret) {
2659                 error("failed to initialize metadata restore state: %d", ret);
2660                 goto failed_cluster;
2661         }
2662
2663         if (!multi_devices && !old_restore) {
2664                 ret = build_chunk_tree(&mdrestore, cluster);
2665                 if (ret)
2666                         goto out;
2667                 if (!list_empty(&mdrestore.overlapping_chunks))
2668                         remap_overlapping_chunks(&mdrestore);
2669         }
2670
2671         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2672                 error("seek failed: %s", strerror(errno));
2673                 goto out;
2674         }
2675
2676         while (!mdrestore.error) {
2677                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2678                 if (!ret)
2679                         break;
2680
2681                 header = &cluster->header;
2682                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2683                     le64_to_cpu(header->bytenr) != bytenr) {
2684                         error("bad header in metadump image");
2685                         ret = -EIO;
2686                         break;
2687                 }
2688                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2689                 if (ret) {
2690                         error("failed to add cluster: %d", ret);
2691                         break;
2692                 }
2693         }
2694         ret = wait_for_worker(&mdrestore);
2695
2696         if (!ret && !multi_devices && !old_restore) {
2697                 struct btrfs_root *root;
2698                 struct stat st;
2699
2700                 root = open_ctree_fd(fileno(out), target, 0,
2701                                           OPEN_CTREE_PARTIAL |
2702                                           OPEN_CTREE_WRITES |
2703                                           OPEN_CTREE_NO_DEVICES);
2704                 if (!root) {
2705                         error("open ctree failed in %s", target);
2706                         ret = -EIO;
2707                         goto out;
2708                 }
2709                 info = root->fs_info;
2710
2711                 if (stat(target, &st)) {
2712                         error("stat %s failed: %s", target, strerror(errno));
2713                         close_ctree(info->chunk_root);
2714                         free(cluster);
2715                         return 1;
2716                 }
2717
2718                 ret = fixup_devices(info, &mdrestore, st.st_size);
2719                 close_ctree(info->chunk_root);
2720                 if (ret)
2721                         goto out;
2722         }
2723 out:
2724         mdrestore_destroy(&mdrestore, num_threads);
2725 failed_cluster:
2726         free(cluster);
2727 failed_info:
2728         if (fixup_offset && info)
2729                 close_ctree(info->chunk_root);
2730 failed_open:
2731         if (in != stdin)
2732                 fclose(in);
2733         return ret;
2734 }
2735
2736 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2737                                        const char *other_dev, u64 cur_devid)
2738 {
2739         struct btrfs_key key;
2740         struct extent_buffer *leaf;
2741         struct btrfs_path path;
2742         struct btrfs_dev_item *dev_item;
2743         struct btrfs_super_block *disk_super;
2744         char dev_uuid[BTRFS_UUID_SIZE];
2745         char fs_uuid[BTRFS_UUID_SIZE];
2746         u64 devid, type, io_align, io_width;
2747         u64 sector_size, total_bytes, bytes_used;
2748         char buf[BTRFS_SUPER_INFO_SIZE];
2749         int fp = -1;
2750         int ret;
2751
2752         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2753         key.type = BTRFS_DEV_ITEM_KEY;
2754         key.offset = cur_devid;
2755
2756         btrfs_init_path(&path);
2757         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2758         if (ret) {
2759                 error("search key failed: %d", ret);
2760                 ret = -EIO;
2761                 goto out;
2762         }
2763
2764         leaf = path.nodes[0];
2765         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2766                                   struct btrfs_dev_item);
2767
2768         devid = btrfs_device_id(leaf, dev_item);
2769         if (devid != cur_devid) {
2770                 error("devid mismatch: %llu != %llu",
2771                                 (unsigned long long)devid,
2772                                 (unsigned long long)cur_devid);
2773                 ret = -EIO;
2774                 goto out;
2775         }
2776
2777         type = btrfs_device_type(leaf, dev_item);
2778         io_align = btrfs_device_io_align(leaf, dev_item);
2779         io_width = btrfs_device_io_width(leaf, dev_item);
2780         sector_size = btrfs_device_sector_size(leaf, dev_item);
2781         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2782         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2783         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2784         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2785
2786         btrfs_release_path(&path);
2787
2788         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2789
2790         /* update other devices' super block */
2791         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2792         if (fp < 0) {
2793                 error("could not open %s: %s", other_dev, strerror(errno));
2794                 ret = -EIO;
2795                 goto out;
2796         }
2797
2798         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2799
2800         disk_super = (struct btrfs_super_block *)buf;
2801         dev_item = &disk_super->dev_item;
2802
2803         btrfs_set_stack_device_type(dev_item, type);
2804         btrfs_set_stack_device_id(dev_item, devid);
2805         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2806         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2807         btrfs_set_stack_device_io_align(dev_item, io_align);
2808         btrfs_set_stack_device_io_width(dev_item, io_width);
2809         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2810         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2811         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2812         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2813
2814         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2815         if (ret != BTRFS_SUPER_INFO_SIZE) {
2816                 if (ret < 0)
2817                         error("cannot write superblock: %s", strerror(ret));
2818                 else
2819                         error("cannot write superblock");
2820                 ret = -EIO;
2821                 goto out;
2822         }
2823
2824         write_backup_supers(fp, (u8 *)buf);
2825
2826 out:
2827         if (fp != -1)
2828                 close(fp);
2829         return ret;
2830 }
2831
2832 static void print_usage(int ret)
2833 {
2834         printf("usage: btrfs-image [options] source target\n");
2835         printf("\t-r      \trestore metadump image\n");
2836         printf("\t-c value\tcompression level (0 ~ 9)\n");
2837         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2838         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2839         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2840         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2841         printf("\t-m       \trestore for multiple devices\n");
2842         printf("\n");
2843         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2844         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2845         exit(ret);
2846 }
2847
2848 int main(int argc, char *argv[])
2849 {
2850         char *source;
2851         char *target;
2852         u64 num_threads = 0;
2853         u64 compress_level = 0;
2854         int create = 1;
2855         int old_restore = 0;
2856         int walk_trees = 0;
2857         int multi_devices = 0;
2858         int ret;
2859         int sanitize = 0;
2860         int dev_cnt = 0;
2861         int usage_error = 0;
2862         FILE *out;
2863
2864         while (1) {
2865                 static const struct option long_options[] = {
2866                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2867                         { NULL, 0, NULL, 0 }
2868                 };
2869                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2870                 if (c < 0)
2871                         break;
2872                 switch (c) {
2873                 case 'r':
2874                         create = 0;
2875                         break;
2876                 case 't':
2877                         num_threads = arg_strtou64(optarg);
2878                         if (num_threads > MAX_WORKER_THREADS) {
2879                                 error("number of threads out of range: %llu > %d",
2880                                         (unsigned long long)num_threads,
2881                                         MAX_WORKER_THREADS);
2882                                 return 1;
2883                         }
2884                         break;
2885                 case 'c':
2886                         compress_level = arg_strtou64(optarg);
2887                         if (compress_level > 9) {
2888                                 error("compression level out of range: %llu",
2889                                         (unsigned long long)compress_level);
2890                                 return 1;
2891                         }
2892                         break;
2893                 case 'o':
2894                         old_restore = 1;
2895                         break;
2896                 case 's':
2897                         sanitize++;
2898                         break;
2899                 case 'w':
2900                         walk_trees = 1;
2901                         break;
2902                 case 'm':
2903                         create = 0;
2904                         multi_devices = 1;
2905                         break;
2906                         case GETOPT_VAL_HELP:
2907                 default:
2908                         print_usage(c != GETOPT_VAL_HELP);
2909                 }
2910         }
2911
2912         set_argv0(argv);
2913         if (check_argc_min(argc - optind, 2))
2914                 print_usage(1);
2915
2916         dev_cnt = argc - optind - 1;
2917
2918         if (create) {
2919                 if (old_restore) {
2920                         error(
2921                         "create and restore cannot be used at the same time");
2922                         usage_error++;
2923                 }
2924         } else {
2925                 if (walk_trees || sanitize || compress_level) {
2926                         error(
2927                         "useing -w, -s, -c options for restore makes no sense");
2928                         usage_error++;
2929                 }
2930                 if (multi_devices && dev_cnt < 2) {
2931                         error("not enough devices specified for -m option");
2932                         usage_error++;
2933                 }
2934                 if (!multi_devices && dev_cnt != 1) {
2935                         error("accepts only 1 device without -m option");
2936                         usage_error++;
2937                 }
2938         }
2939
2940         if (usage_error)
2941                 print_usage(1);
2942
2943         source = argv[optind];
2944         target = argv[optind + 1];
2945
2946         if (create && !strcmp(target, "-")) {
2947                 out = stdout;
2948         } else {
2949                 out = fopen(target, "w+");
2950                 if (!out) {
2951                         error("unable to create target file %s", target);
2952                         exit(1);
2953                 }
2954         }
2955
2956         if (compress_level > 0 || create == 0) {
2957                 if (num_threads == 0) {
2958                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2959
2960                         if (tmp <= 0)
2961                                 tmp = 1;
2962                         num_threads = tmp;
2963                 }
2964         } else {
2965                 num_threads = 0;
2966         }
2967
2968         if (create) {
2969                 ret = check_mounted(source);
2970                 if (ret < 0) {
2971                         warning("unable to check mount status of: %s",
2972                                         strerror(-ret));
2973                 } else if (ret) {
2974                         warning("%s already mounted, results may be inaccurate",
2975                                         source);
2976                 }
2977
2978                 ret = create_metadump(source, out, num_threads,
2979                                       compress_level, sanitize, walk_trees);
2980         } else {
2981                 ret = restore_metadump(source, out, old_restore, num_threads,
2982                                        0, target, multi_devices);
2983         }
2984         if (ret) {
2985                 error("%s failed: %s", (create) ? "create" : "restore",
2986                        strerror(errno));
2987                 goto out;
2988         }
2989
2990          /* extended support for multiple devices */
2991         if (!create && multi_devices) {
2992                 struct btrfs_fs_info *info;
2993                 u64 total_devs;
2994                 int i;
2995
2996                 info = open_ctree_fs_info(target, 0, 0, 0,
2997                                           OPEN_CTREE_PARTIAL |
2998                                           OPEN_CTREE_RESTORE);
2999                 if (!info) {
3000                         error("open ctree failed at %s", target);
3001                         return 1;
3002                 }
3003
3004                 total_devs = btrfs_super_num_devices(info->super_copy);
3005                 if (total_devs != dev_cnt) {
3006                         error("it needs %llu devices but has only %d",
3007                                 total_devs, dev_cnt);
3008                         close_ctree(info->chunk_root);
3009                         goto out;
3010                 }
3011
3012                 /* update super block on other disks */
3013                 for (i = 2; i <= dev_cnt; i++) {
3014                         ret = update_disk_super_on_device(info,
3015                                         argv[optind + i], (u64)i);
3016                         if (ret) {
3017                                 error("update disk superblock failed devid %d: %d",
3018                                         i, ret);
3019                                 close_ctree(info->chunk_root);
3020                                 exit(1);
3021                         }
3022                 }
3023
3024                 close_ctree(info->chunk_root);
3025
3026                 /* fix metadata block to map correct chunk */
3027                 ret = restore_metadump(source, out, 0, num_threads, 1,
3028                                        target, 1);
3029                 if (ret) {
3030                         error("unable to fixup metadump: %d", ret);
3031                         exit(1);
3032                 }
3033         }
3034 out:
3035         if (out == stdout) {
3036                 fflush(out);
3037         } else {
3038                 fclose(out);
3039                 if (ret && create) {
3040                         int unlink_ret;
3041
3042                         unlink_ret = unlink(target);
3043                         if (unlink_ret)
3044                                 error("unlink output file %s failed: %s",
3045                                                 target, strerror(errno));
3046                 }
3047         }
3048
3049         btrfs_close_all_devices();
3050
3051         return !!ret;
3052 }