btrfs-progs: image: drop unused parameter from sanitize_xattr
[platform/upstream/btrfs-progs.git] / image / main.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38 #include "help.h"
39 #include "image/metadump.h"
40 #include "image/sanitize.h"
41
42 #define MAX_WORKER_THREADS      (32)
43
44 struct async_work {
45         struct list_head list;
46         struct list_head ordered;
47         u64 start;
48         u64 size;
49         u8 *buffer;
50         size_t bufsize;
51         int error;
52 };
53
54 struct metadump_struct {
55         struct btrfs_root *root;
56         FILE *out;
57
58         union {
59                 struct meta_cluster cluster;
60                 char meta_cluster_bytes[BLOCK_SIZE];
61         };
62
63         pthread_t threads[MAX_WORKER_THREADS];
64         size_t num_threads;
65         pthread_mutex_t mutex;
66         pthread_cond_t cond;
67         struct rb_root name_tree;
68
69         struct list_head list;
70         struct list_head ordered;
71         size_t num_items;
72         size_t num_ready;
73
74         u64 pending_start;
75         u64 pending_size;
76
77         int compress_level;
78         int done;
79         int data;
80         enum sanitize_mode sanitize_names;
81
82         int error;
83 };
84
85 struct mdrestore_struct {
86         FILE *in;
87         FILE *out;
88
89         pthread_t threads[MAX_WORKER_THREADS];
90         size_t num_threads;
91         pthread_mutex_t mutex;
92         pthread_cond_t cond;
93
94         struct rb_root chunk_tree;
95         struct rb_root physical_tree;
96         struct list_head list;
97         struct list_head overlapping_chunks;
98         size_t num_items;
99         u32 nodesize;
100         u64 devid;
101         u64 alloced_chunks;
102         u64 last_physical_offset;
103         u8 uuid[BTRFS_UUID_SIZE];
104         u8 fsid[BTRFS_FSID_SIZE];
105
106         int compress_method;
107         int done;
108         int error;
109         int old_restore;
110         int fixup_offset;
111         int multi_devices;
112         int clear_space_cache;
113         struct btrfs_fs_info *info;
114 };
115
116 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
117                                    u64 search, u64 cluster_bytenr);
118 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
119
120 static void csum_block(u8 *buf, size_t len)
121 {
122         u8 result[BTRFS_CRC32_SIZE];
123         u32 crc = ~(u32)0;
124         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
125         btrfs_csum_final(crc, result);
126         memcpy(buf, result, BTRFS_CRC32_SIZE);
127 }
128
129 static int has_name(struct btrfs_key *key)
130 {
131         switch (key->type) {
132         case BTRFS_DIR_ITEM_KEY:
133         case BTRFS_DIR_INDEX_KEY:
134         case BTRFS_INODE_REF_KEY:
135         case BTRFS_INODE_EXTREF_KEY:
136         case BTRFS_XATTR_ITEM_KEY:
137                 return 1;
138         default:
139                 break;
140         }
141
142         return 0;
143 }
144
145 static char *generate_garbage(u32 name_len)
146 {
147         char *buf = malloc(name_len);
148         int i;
149
150         if (!buf)
151                 return NULL;
152
153         for (i = 0; i < name_len; i++) {
154                 char c = rand_range(94) + 33;
155
156                 if (c == '/')
157                         c++;
158                 buf[i] = c;
159         }
160
161         return buf;
162 }
163
164 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
165 {
166         struct name *entry = rb_entry(a, struct name, n);
167         struct name *ins = rb_entry(b, struct name, n);
168         u32 len;
169
170         len = min(ins->len, entry->len);
171         return memcmp(ins->val, entry->val, len);
172 }
173
174 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
175 {
176         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
177         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
178
179         if (fuzz && ins->logical >= entry->logical &&
180             ins->logical < entry->logical + entry->bytes)
181                 return 0;
182
183         if (ins->logical < entry->logical)
184                 return -1;
185         else if (ins->logical > entry->logical)
186                 return 1;
187         return 0;
188 }
189
190 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
191 {
192         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
193         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
194
195         if (fuzz && ins->physical >= entry->physical &&
196             ins->physical < entry->physical + entry->bytes)
197                 return 0;
198
199         if (fuzz && entry->physical >= ins->physical &&
200             entry->physical < ins->physical + ins->bytes)
201                 return 0;
202
203         if (ins->physical < entry->physical)
204                 return -1;
205         else if (ins->physical > entry->physical)
206                 return 1;
207         return 0;
208 }
209
210 static void tree_insert(struct rb_root *root, struct rb_node *ins,
211                         int (*cmp)(struct rb_node *a, struct rb_node *b,
212                                    int fuzz))
213 {
214         struct rb_node ** p = &root->rb_node;
215         struct rb_node * parent = NULL;
216         int dir;
217
218         while(*p) {
219                 parent = *p;
220
221                 dir = cmp(*p, ins, 1);
222                 if (dir < 0)
223                         p = &(*p)->rb_left;
224                 else if (dir > 0)
225                         p = &(*p)->rb_right;
226                 else
227                         BUG();
228         }
229
230         rb_link_node(ins, parent, p);
231         rb_insert_color(ins, root);
232 }
233
234 static struct rb_node *tree_search(struct rb_root *root,
235                                    struct rb_node *search,
236                                    int (*cmp)(struct rb_node *a,
237                                               struct rb_node *b, int fuzz),
238                                    int fuzz)
239 {
240         struct rb_node *n = root->rb_node;
241         int dir;
242
243         while (n) {
244                 dir = cmp(n, search, fuzz);
245                 if (dir < 0)
246                         n = n->rb_left;
247                 else if (dir > 0)
248                         n = n->rb_right;
249                 else
250                         return n;
251         }
252
253         return NULL;
254 }
255
256 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
257                                u64 *size, u64 *physical_dup)
258 {
259         struct fs_chunk *fs_chunk;
260         struct rb_node *entry;
261         struct fs_chunk search;
262         u64 offset;
263
264         if (logical == BTRFS_SUPER_INFO_OFFSET)
265                 return logical;
266
267         search.logical = logical;
268         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
269         if (!entry) {
270                 if (mdres->in != stdin)
271                         warning("cannot find a chunk, using logical");
272                 return logical;
273         }
274         fs_chunk = rb_entry(entry, struct fs_chunk, l);
275         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
276                 BUG();
277         offset = search.logical - fs_chunk->logical;
278
279         if (physical_dup) {
280                 /* Only in dup case, physical_dup is not equal to 0 */
281                 if (fs_chunk->physical_dup)
282                         *physical_dup = fs_chunk->physical_dup + offset;
283                 else
284                         *physical_dup = 0;
285         }
286
287         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
288         return fs_chunk->physical + offset;
289 }
290
291 /*
292  * Reverse CRC-32C table
293  */
294 static const u32 crc32c_rev_table[256] = {
295         0x00000000L,0x05EC76F1L,0x0BD8EDE2L,0x0E349B13L,
296         0x17B1DBC4L,0x125DAD35L,0x1C693626L,0x198540D7L,
297         0x2F63B788L,0x2A8FC179L,0x24BB5A6AL,0x21572C9BL,
298         0x38D26C4CL,0x3D3E1ABDL,0x330A81AEL,0x36E6F75FL,
299         0x5EC76F10L,0x5B2B19E1L,0x551F82F2L,0x50F3F403L,
300         0x4976B4D4L,0x4C9AC225L,0x42AE5936L,0x47422FC7L,
301         0x71A4D898L,0x7448AE69L,0x7A7C357AL,0x7F90438BL,
302         0x6615035CL,0x63F975ADL,0x6DCDEEBEL,0x6821984FL,
303         0xBD8EDE20L,0xB862A8D1L,0xB65633C2L,0xB3BA4533L,
304         0xAA3F05E4L,0xAFD37315L,0xA1E7E806L,0xA40B9EF7L,
305         0x92ED69A8L,0x97011F59L,0x9935844AL,0x9CD9F2BBL,
306         0x855CB26CL,0x80B0C49DL,0x8E845F8EL,0x8B68297FL,
307         0xE349B130L,0xE6A5C7C1L,0xE8915CD2L,0xED7D2A23L,
308         0xF4F86AF4L,0xF1141C05L,0xFF208716L,0xFACCF1E7L,
309         0xCC2A06B8L,0xC9C67049L,0xC7F2EB5AL,0xC21E9DABL,
310         0xDB9BDD7CL,0xDE77AB8DL,0xD043309EL,0xD5AF466FL,
311         0x7EF1CAB1L,0x7B1DBC40L,0x75292753L,0x70C551A2L,
312         0x69401175L,0x6CAC6784L,0x6298FC97L,0x67748A66L,
313         0x51927D39L,0x547E0BC8L,0x5A4A90DBL,0x5FA6E62AL,
314         0x4623A6FDL,0x43CFD00CL,0x4DFB4B1FL,0x48173DEEL,
315         0x2036A5A1L,0x25DAD350L,0x2BEE4843L,0x2E023EB2L,
316         0x37877E65L,0x326B0894L,0x3C5F9387L,0x39B3E576L,
317         0x0F551229L,0x0AB964D8L,0x048DFFCBL,0x0161893AL,
318         0x18E4C9EDL,0x1D08BF1CL,0x133C240FL,0x16D052FEL,
319         0xC37F1491L,0xC6936260L,0xC8A7F973L,0xCD4B8F82L,
320         0xD4CECF55L,0xD122B9A4L,0xDF1622B7L,0xDAFA5446L,
321         0xEC1CA319L,0xE9F0D5E8L,0xE7C44EFBL,0xE228380AL,
322         0xFBAD78DDL,0xFE410E2CL,0xF075953FL,0xF599E3CEL,
323         0x9DB87B81L,0x98540D70L,0x96609663L,0x938CE092L,
324         0x8A09A045L,0x8FE5D6B4L,0x81D14DA7L,0x843D3B56L,
325         0xB2DBCC09L,0xB737BAF8L,0xB90321EBL,0xBCEF571AL,
326         0xA56A17CDL,0xA086613CL,0xAEB2FA2FL,0xAB5E8CDEL,
327         0xFDE39562L,0xF80FE393L,0xF63B7880L,0xF3D70E71L,
328         0xEA524EA6L,0xEFBE3857L,0xE18AA344L,0xE466D5B5L,
329         0xD28022EAL,0xD76C541BL,0xD958CF08L,0xDCB4B9F9L,
330         0xC531F92EL,0xC0DD8FDFL,0xCEE914CCL,0xCB05623DL,
331         0xA324FA72L,0xA6C88C83L,0xA8FC1790L,0xAD106161L,
332         0xB49521B6L,0xB1795747L,0xBF4DCC54L,0xBAA1BAA5L,
333         0x8C474DFAL,0x89AB3B0BL,0x879FA018L,0x8273D6E9L,
334         0x9BF6963EL,0x9E1AE0CFL,0x902E7BDCL,0x95C20D2DL,
335         0x406D4B42L,0x45813DB3L,0x4BB5A6A0L,0x4E59D051L,
336         0x57DC9086L,0x5230E677L,0x5C047D64L,0x59E80B95L,
337         0x6F0EFCCAL,0x6AE28A3BL,0x64D61128L,0x613A67D9L,
338         0x78BF270EL,0x7D5351FFL,0x7367CAECL,0x768BBC1DL,
339         0x1EAA2452L,0x1B4652A3L,0x1572C9B0L,0x109EBF41L,
340         0x091BFF96L,0x0CF78967L,0x02C31274L,0x072F6485L,
341         0x31C993DAL,0x3425E52BL,0x3A117E38L,0x3FFD08C9L,
342         0x2678481EL,0x23943EEFL,0x2DA0A5FCL,0x284CD30DL,
343         0x83125FD3L,0x86FE2922L,0x88CAB231L,0x8D26C4C0L,
344         0x94A38417L,0x914FF2E6L,0x9F7B69F5L,0x9A971F04L,
345         0xAC71E85BL,0xA99D9EAAL,0xA7A905B9L,0xA2457348L,
346         0xBBC0339FL,0xBE2C456EL,0xB018DE7DL,0xB5F4A88CL,
347         0xDDD530C3L,0xD8394632L,0xD60DDD21L,0xD3E1ABD0L,
348         0xCA64EB07L,0xCF889DF6L,0xC1BC06E5L,0xC4507014L,
349         0xF2B6874BL,0xF75AF1BAL,0xF96E6AA9L,0xFC821C58L,
350         0xE5075C8FL,0xE0EB2A7EL,0xEEDFB16DL,0xEB33C79CL,
351         0x3E9C81F3L,0x3B70F702L,0x35446C11L,0x30A81AE0L,
352         0x292D5A37L,0x2CC12CC6L,0x22F5B7D5L,0x2719C124L,
353         0x11FF367BL,0x1413408AL,0x1A27DB99L,0x1FCBAD68L,
354         0x064EEDBFL,0x03A29B4EL,0x0D96005DL,0x087A76ACL,
355         0x605BEEE3L,0x65B79812L,0x6B830301L,0x6E6F75F0L,
356         0x77EA3527L,0x720643D6L,0x7C32D8C5L,0x79DEAE34L,
357         0x4F38596BL,0x4AD42F9AL,0x44E0B489L,0x410CC278L,
358         0x588982AFL,0x5D65F45EL,0x53516F4DL,0x56BD19BCL
359 };
360
361 /*
362  * Calculate a 4-byte suffix to match desired CRC32C
363  *
364  * @current_crc: CRC32C checksum of all bytes before the suffix
365  * @desired_crc: the checksum that we want to get after adding the suffix
366  *
367  * Outputs: @suffix: pointer to where the suffix will be written (4-bytes)
368  */
369 static void find_collision_calc_suffix(unsigned long current_crc,
370                                        unsigned long desired_crc,
371                                        char *suffix)
372 {
373         int i;
374
375         for(i = 3; i >= 0; i--) {
376                 desired_crc = (desired_crc << 8)
377                             ^ crc32c_rev_table[desired_crc >> 24 & 0xFF]
378                             ^ ((current_crc >> i * 8) & 0xFF);
379         }
380         for (i = 0; i < 4; i++)
381                 suffix[i] = (desired_crc >> i * 8) & 0xFF;
382 }
383
384 /*
385  * Check if suffix is valid according to our file name conventions
386  */
387 static int find_collision_is_suffix_valid(const char *suffix)
388 {
389         int i;
390         char c;
391
392         for (i = 0; i < 4; i++) {
393                 c = suffix[i];
394                 if (c < ' ' || c > 126 || c == '/')
395                         return 0;
396         }
397         return 1;
398 }
399
400 static int find_collision_reverse_crc32c(struct name *val, u32 name_len)
401 {
402         unsigned long checksum;
403         unsigned long current_checksum;
404         int found = 0;
405         int i;
406
407         /* There are no same length collisions of 4 or less bytes */
408         if (name_len <= 4)
409                 return 0;
410         checksum = crc32c(~1, val->val, name_len);
411         name_len -= 4;
412         memset(val->sub, ' ', name_len);
413         i = 0;
414         while (1) {
415                 current_checksum = crc32c(~1, val->sub, name_len);
416                 find_collision_calc_suffix(current_checksum,
417                                            checksum,
418                                            val->sub + name_len);
419                 if (find_collision_is_suffix_valid(val->sub + name_len) &&
420                     memcmp(val->sub, val->val, val->len)) {
421                         found = 1;
422                         break;
423                 }
424
425                 if (val->sub[i] == 126) {
426                         do {
427                                 i++;
428                                 if (i >= name_len)
429                                         break;
430                         } while (val->sub[i] == 126);
431
432                         if (i >= name_len)
433                                 break;
434                         val->sub[i]++;
435                         if (val->sub[i] == '/')
436                                 val->sub[i]++;
437                         memset(val->sub, ' ', i);
438                         i = 0;
439                         continue;
440                 } else {
441                         val->sub[i]++;
442                         if (val->sub[i] == '/')
443                                 val->sub[i]++;
444                 }
445         }
446         return found;
447 }
448
449 static char *find_collision(struct rb_root *name_tree, char *name,
450                             u32 name_len)
451 {
452         struct name *val;
453         struct rb_node *entry;
454         struct name tmp;
455         int found;
456         int i;
457
458         tmp.val = name;
459         tmp.len = name_len;
460         entry = tree_search(name_tree, &tmp.n, name_cmp, 0);
461         if (entry) {
462                 val = rb_entry(entry, struct name, n);
463                 free(name);
464                 return val->sub;
465         }
466
467         val = malloc(sizeof(struct name));
468         if (!val) {
469                 error("cannot sanitize name, not enough memory");
470                 free(name);
471                 return NULL;
472         }
473
474         memset(val, 0, sizeof(*val));
475
476         val->val = name;
477         val->len = name_len;
478         val->sub = malloc(name_len);
479         if (!val->sub) {
480                 error("cannot sanitize name, not enough memory");
481                 free(val);
482                 free(name);
483                 return NULL;
484         }
485
486         found = find_collision_reverse_crc32c(val, name_len);
487
488         if (!found) {
489                 warning(
490 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
491                         val->len, val->val);
492                 for (i = 0; i < name_len; i++) {
493                         char c = rand_range(94) + 33;
494
495                         if (c == '/')
496                                 c++;
497                         val->sub[i] = c;
498                 }
499         }
500
501         tree_insert(name_tree, &val->n, name_cmp);
502         return val->sub;
503 }
504
505 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
506                               int slot)
507 {
508         struct btrfs_dir_item *dir_item;
509         char *buf;
510         char *garbage;
511         unsigned long name_ptr;
512         u32 total_len;
513         u32 cur = 0;
514         u32 this_len;
515         u32 name_len;
516         int free_garbage = (md->sanitize_names == SANITIZE_NAMES);
517
518         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
519         total_len = btrfs_item_size_nr(eb, slot);
520         while (cur < total_len) {
521                 this_len = sizeof(*dir_item) +
522                         btrfs_dir_name_len(eb, dir_item) +
523                         btrfs_dir_data_len(eb, dir_item);
524                 name_ptr = (unsigned long)(dir_item + 1);
525                 name_len = btrfs_dir_name_len(eb, dir_item);
526
527                 if (md->sanitize_names == SANITIZE_COLLISIONS) {
528                         buf = malloc(name_len);
529                         if (!buf) {
530                                 error("cannot sanitize name, not enough memory");
531                                 return;
532                         }
533                         read_extent_buffer(eb, buf, name_ptr, name_len);
534                         garbage = find_collision(&md->name_tree, buf, name_len);
535                 } else {
536                         garbage = generate_garbage(name_len);
537                 }
538                 if (!garbage) {
539                         error("cannot sanitize name, not enough memory");
540                         return;
541                 }
542                 write_extent_buffer(eb, garbage, name_ptr, name_len);
543                 cur += this_len;
544                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
545                                                      this_len);
546                 if (free_garbage)
547                         free(garbage);
548         }
549 }
550
551 static void sanitize_inode_ref(struct metadump_struct *md,
552                                struct extent_buffer *eb, int slot, int ext)
553 {
554         struct btrfs_inode_extref *extref;
555         struct btrfs_inode_ref *ref;
556         char *garbage, *buf;
557         unsigned long ptr;
558         unsigned long name_ptr;
559         u32 item_size;
560         u32 cur_offset = 0;
561         int len;
562         int free_garbage = (md->sanitize_names == SANITIZE_NAMES);
563
564         item_size = btrfs_item_size_nr(eb, slot);
565         ptr = btrfs_item_ptr_offset(eb, slot);
566         while (cur_offset < item_size) {
567                 if (ext) {
568                         extref = (struct btrfs_inode_extref *)(ptr +
569                                                                cur_offset);
570                         name_ptr = (unsigned long)(&extref->name);
571                         len = btrfs_inode_extref_name_len(eb, extref);
572                         cur_offset += sizeof(*extref);
573                 } else {
574                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
575                         len = btrfs_inode_ref_name_len(eb, ref);
576                         name_ptr = (unsigned long)(ref + 1);
577                         cur_offset += sizeof(*ref);
578                 }
579                 cur_offset += len;
580
581                 if (md->sanitize_names == SANITIZE_COLLISIONS) {
582                         buf = malloc(len);
583                         if (!buf) {
584                                 error("cannot sanitize name, not enough memory");
585                                 return;
586                         }
587                         read_extent_buffer(eb, buf, name_ptr, len);
588                         garbage = find_collision(&md->name_tree, buf, len);
589                 } else {
590                         garbage = generate_garbage(len);
591                 }
592
593                 if (!garbage) {
594                         error("cannot sanitize name, not enough memory");
595                         return;
596                 }
597                 write_extent_buffer(eb, garbage, name_ptr, len);
598                 if (free_garbage)
599                         free(garbage);
600         }
601 }
602
603 static void sanitize_xattr(struct extent_buffer *eb, int slot)
604 {
605         struct btrfs_dir_item *dir_item;
606         unsigned long data_ptr;
607         u32 data_len;
608
609         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
610         data_len = btrfs_dir_data_len(eb, dir_item);
611
612         data_ptr = (unsigned long)((char *)(dir_item + 1) +
613                                    btrfs_dir_name_len(eb, dir_item));
614         memset_extent_buffer(eb, 0, data_ptr, data_len);
615 }
616
617 static void sanitize_name(struct metadump_struct *md, u8 *dst,
618                           struct extent_buffer *src, struct btrfs_key *key,
619                           int slot)
620 {
621         struct extent_buffer *eb;
622
623         eb = alloc_dummy_eb(src->start, src->len);
624         if (!eb) {
625                 error("cannot sanitize name, not enough memory");
626                 return;
627         }
628
629         memcpy(eb->data, src->data, src->len);
630
631         switch (key->type) {
632         case BTRFS_DIR_ITEM_KEY:
633         case BTRFS_DIR_INDEX_KEY:
634                 sanitize_dir_item(md, eb, slot);
635                 break;
636         case BTRFS_INODE_REF_KEY:
637                 sanitize_inode_ref(md, eb, slot, 0);
638                 break;
639         case BTRFS_INODE_EXTREF_KEY:
640                 sanitize_inode_ref(md, eb, slot, 1);
641                 break;
642         case BTRFS_XATTR_ITEM_KEY:
643                 sanitize_xattr(eb, slot);
644                 break;
645         default:
646                 break;
647         }
648
649         memcpy(dst, eb->data, eb->len);
650         free(eb);
651 }
652
653 /*
654  * zero inline extents and csum items
655  */
656 static void zero_items(struct metadump_struct *md, u8 *dst,
657                        struct extent_buffer *src)
658 {
659         struct btrfs_file_extent_item *fi;
660         struct btrfs_item *item;
661         struct btrfs_key key;
662         u32 nritems = btrfs_header_nritems(src);
663         size_t size;
664         unsigned long ptr;
665         int i, extent_type;
666
667         for (i = 0; i < nritems; i++) {
668                 item = btrfs_item_nr(i);
669                 btrfs_item_key_to_cpu(src, &key, i);
670                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
671                         size = btrfs_item_size_nr(src, i);
672                         memset(dst + btrfs_leaf_data(src) +
673                                btrfs_item_offset_nr(src, i), 0, size);
674                         continue;
675                 }
676
677                 if (md->sanitize_names && has_name(&key)) {
678                         sanitize_name(md, dst, src, &key, i);
679                         continue;
680                 }
681
682                 if (key.type != BTRFS_EXTENT_DATA_KEY)
683                         continue;
684
685                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
686                 extent_type = btrfs_file_extent_type(src, fi);
687                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
688                         continue;
689
690                 ptr = btrfs_file_extent_inline_start(fi);
691                 size = btrfs_file_extent_inline_item_len(src, item);
692                 memset(dst + ptr, 0, size);
693         }
694 }
695
696 /*
697  * copy buffer and zero useless data in the buffer
698  */
699 static void copy_buffer(struct metadump_struct *md, u8 *dst,
700                         struct extent_buffer *src)
701 {
702         int level;
703         size_t size;
704         u32 nritems;
705
706         memcpy(dst, src->data, src->len);
707         if (src->start == BTRFS_SUPER_INFO_OFFSET)
708                 return;
709
710         level = btrfs_header_level(src);
711         nritems = btrfs_header_nritems(src);
712
713         if (nritems == 0) {
714                 size = sizeof(struct btrfs_header);
715                 memset(dst + size, 0, src->len - size);
716         } else if (level == 0) {
717                 size = btrfs_leaf_data(src) +
718                         btrfs_item_offset_nr(src, nritems - 1) -
719                         btrfs_item_nr_offset(nritems);
720                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
721                 zero_items(md, dst, src);
722         } else {
723                 size = offsetof(struct btrfs_node, ptrs) +
724                         sizeof(struct btrfs_key_ptr) * nritems;
725                 memset(dst + size, 0, src->len - size);
726         }
727         csum_block(dst, src->len);
728 }
729
730 static void *dump_worker(void *data)
731 {
732         struct metadump_struct *md = (struct metadump_struct *)data;
733         struct async_work *async;
734         int ret;
735
736         while (1) {
737                 pthread_mutex_lock(&md->mutex);
738                 while (list_empty(&md->list)) {
739                         if (md->done) {
740                                 pthread_mutex_unlock(&md->mutex);
741                                 goto out;
742                         }
743                         pthread_cond_wait(&md->cond, &md->mutex);
744                 }
745                 async = list_entry(md->list.next, struct async_work, list);
746                 list_del_init(&async->list);
747                 pthread_mutex_unlock(&md->mutex);
748
749                 if (md->compress_level > 0) {
750                         u8 *orig = async->buffer;
751
752                         async->bufsize = compressBound(async->size);
753                         async->buffer = malloc(async->bufsize);
754                         if (!async->buffer) {
755                                 error("not enough memory for async buffer");
756                                 pthread_mutex_lock(&md->mutex);
757                                 if (!md->error)
758                                         md->error = -ENOMEM;
759                                 pthread_mutex_unlock(&md->mutex);
760                                 pthread_exit(NULL);
761                         }
762
763                         ret = compress2(async->buffer,
764                                          (unsigned long *)&async->bufsize,
765                                          orig, async->size, md->compress_level);
766
767                         if (ret != Z_OK)
768                                 async->error = 1;
769
770                         free(orig);
771                 }
772
773                 pthread_mutex_lock(&md->mutex);
774                 md->num_ready++;
775                 pthread_mutex_unlock(&md->mutex);
776         }
777 out:
778         pthread_exit(NULL);
779 }
780
781 static void meta_cluster_init(struct metadump_struct *md, u64 start)
782 {
783         struct meta_cluster_header *header;
784
785         md->num_items = 0;
786         md->num_ready = 0;
787         header = &md->cluster.header;
788         header->magic = cpu_to_le64(HEADER_MAGIC);
789         header->bytenr = cpu_to_le64(start);
790         header->nritems = cpu_to_le32(0);
791         header->compress = md->compress_level > 0 ?
792                            COMPRESS_ZLIB : COMPRESS_NONE;
793 }
794
795 static void metadump_destroy(struct metadump_struct *md, int num_threads)
796 {
797         int i;
798         struct rb_node *n;
799
800         pthread_mutex_lock(&md->mutex);
801         md->done = 1;
802         pthread_cond_broadcast(&md->cond);
803         pthread_mutex_unlock(&md->mutex);
804
805         for (i = 0; i < num_threads; i++)
806                 pthread_join(md->threads[i], NULL);
807
808         pthread_cond_destroy(&md->cond);
809         pthread_mutex_destroy(&md->mutex);
810
811         while ((n = rb_first(&md->name_tree))) {
812                 struct name *name;
813
814                 name = rb_entry(n, struct name, n);
815                 rb_erase(n, &md->name_tree);
816                 free(name->val);
817                 free(name->sub);
818                 free(name);
819         }
820 }
821
822 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
823                          FILE *out, int num_threads, int compress_level,
824                          enum sanitize_mode sanitize_names)
825 {
826         int i, ret = 0;
827
828         memset(md, 0, sizeof(*md));
829         INIT_LIST_HEAD(&md->list);
830         INIT_LIST_HEAD(&md->ordered);
831         md->root = root;
832         md->out = out;
833         md->pending_start = (u64)-1;
834         md->compress_level = compress_level;
835         md->sanitize_names = sanitize_names;
836         if (sanitize_names == SANITIZE_COLLISIONS)
837                 crc32c_optimization_init();
838
839         md->name_tree.rb_node = NULL;
840         md->num_threads = num_threads;
841         pthread_cond_init(&md->cond, NULL);
842         pthread_mutex_init(&md->mutex, NULL);
843         meta_cluster_init(md, 0);
844
845         if (!num_threads)
846                 return 0;
847
848         for (i = 0; i < num_threads; i++) {
849                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
850                 if (ret)
851                         break;
852         }
853
854         if (ret)
855                 metadump_destroy(md, i + 1);
856
857         return ret;
858 }
859
860 static int write_zero(FILE *out, size_t size)
861 {
862         static char zero[BLOCK_SIZE];
863         return fwrite(zero, size, 1, out);
864 }
865
866 static int write_buffers(struct metadump_struct *md, u64 *next)
867 {
868         struct meta_cluster_header *header = &md->cluster.header;
869         struct meta_cluster_item *item;
870         struct async_work *async;
871         u64 bytenr = 0;
872         u32 nritems = 0;
873         int ret;
874         int err = 0;
875
876         if (list_empty(&md->ordered))
877                 goto out;
878
879         /* wait until all buffers are compressed */
880         while (!err && md->num_items > md->num_ready) {
881                 struct timespec ts = {
882                         .tv_sec = 0,
883                         .tv_nsec = 10000000,
884                 };
885                 pthread_mutex_unlock(&md->mutex);
886                 nanosleep(&ts, NULL);
887                 pthread_mutex_lock(&md->mutex);
888                 err = md->error;
889         }
890
891         if (err) {
892                 error("one of the threads failed: %s", strerror(-err));
893                 goto out;
894         }
895
896         /* setup and write index block */
897         list_for_each_entry(async, &md->ordered, ordered) {
898                 item = &md->cluster.items[nritems];
899                 item->bytenr = cpu_to_le64(async->start);
900                 item->size = cpu_to_le32(async->bufsize);
901                 nritems++;
902         }
903         header->nritems = cpu_to_le32(nritems);
904
905         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
906         if (ret != 1) {
907                 error("unable to write out cluster: %s", strerror(errno));
908                 return -errno;
909         }
910
911         /* write buffers */
912         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
913         while (!list_empty(&md->ordered)) {
914                 async = list_entry(md->ordered.next, struct async_work,
915                                    ordered);
916                 list_del_init(&async->ordered);
917
918                 bytenr += async->bufsize;
919                 if (!err)
920                         ret = fwrite(async->buffer, async->bufsize, 1,
921                                      md->out);
922                 if (ret != 1) {
923                         error("unable to write out cluster: %s",
924                                 strerror(errno));
925                         err = -errno;
926                         ret = 0;
927                 }
928
929                 free(async->buffer);
930                 free(async);
931         }
932
933         /* zero unused space in the last block */
934         if (!err && bytenr & BLOCK_MASK) {
935                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
936
937                 bytenr += size;
938                 ret = write_zero(md->out, size);
939                 if (ret != 1) {
940                         error("unable to zero out buffer: %s",
941                                 strerror(errno));
942                         err = -errno;
943                 }
944         }
945 out:
946         *next = bytenr;
947         return err;
948 }
949
950 static int read_data_extent(struct metadump_struct *md,
951                             struct async_work *async)
952 {
953         struct btrfs_root *root = md->root;
954         struct btrfs_fs_info *fs_info = root->fs_info;
955         u64 bytes_left = async->size;
956         u64 logical = async->start;
957         u64 offset = 0;
958         u64 read_len;
959         int num_copies;
960         int cur_mirror;
961         int ret;
962
963         num_copies = btrfs_num_copies(root->fs_info, logical, bytes_left);
964
965         /* Try our best to read data, just like read_tree_block() */
966         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
967                 while (bytes_left) {
968                         read_len = bytes_left;
969                         ret = read_extent_data(fs_info,
970                                         (char *)(async->buffer + offset),
971                                         logical, &read_len, cur_mirror);
972                         if (ret < 0)
973                                 break;
974                         offset += read_len;
975                         logical += read_len;
976                         bytes_left -= read_len;
977                 }
978         }
979         if (bytes_left)
980                 return -EIO;
981         return 0;
982 }
983
984 static int get_dev_fd(struct btrfs_root *root)
985 {
986         struct btrfs_device *dev;
987
988         dev = list_first_entry(&root->fs_info->fs_devices->devices,
989                                struct btrfs_device, dev_list);
990         return dev->fd;
991 }
992
993 static int flush_pending(struct metadump_struct *md, int done)
994 {
995         struct async_work *async = NULL;
996         struct extent_buffer *eb;
997         u64 start = 0;
998         u64 size;
999         size_t offset;
1000         int ret = 0;
1001
1002         if (md->pending_size) {
1003                 async = calloc(1, sizeof(*async));
1004                 if (!async)
1005                         return -ENOMEM;
1006
1007                 async->start = md->pending_start;
1008                 async->size = md->pending_size;
1009                 async->bufsize = async->size;
1010                 async->buffer = malloc(async->bufsize);
1011                 if (!async->buffer) {
1012                         free(async);
1013                         return -ENOMEM;
1014                 }
1015                 offset = 0;
1016                 start = async->start;
1017                 size = async->size;
1018
1019                 if (md->data) {
1020                         ret = read_data_extent(md, async);
1021                         if (ret) {
1022                                 free(async->buffer);
1023                                 free(async);
1024                                 return ret;
1025                         }
1026                 }
1027
1028                 /*
1029                  * Balance can make the mapping not cover the super block, so
1030                  * just copy directly from one of the devices.
1031                  */
1032                 if (start == BTRFS_SUPER_INFO_OFFSET) {
1033                         int fd = get_dev_fd(md->root);
1034
1035                         ret = pread64(fd, async->buffer, size, start);
1036                         if (ret < size) {
1037                                 free(async->buffer);
1038                                 free(async);
1039                                 error("unable to read superblock at %llu: %s",
1040                                                 (unsigned long long)start,
1041                                                 strerror(errno));
1042                                 return -errno;
1043                         }
1044                         size = 0;
1045                         ret = 0;
1046                 }
1047
1048                 while (!md->data && size > 0) {
1049                         u64 this_read = min((u64)md->root->fs_info->nodesize,
1050                                         size);
1051
1052                         eb = read_tree_block(md->root->fs_info, start, 0);
1053                         if (!extent_buffer_uptodate(eb)) {
1054                                 free(async->buffer);
1055                                 free(async);
1056                                 error("unable to read metadata block %llu",
1057                                         (unsigned long long)start);
1058                                 return -EIO;
1059                         }
1060                         copy_buffer(md, async->buffer + offset, eb);
1061                         free_extent_buffer(eb);
1062                         start += this_read;
1063                         offset += this_read;
1064                         size -= this_read;
1065                 }
1066
1067                 md->pending_start = (u64)-1;
1068                 md->pending_size = 0;
1069         } else if (!done) {
1070                 return 0;
1071         }
1072
1073         pthread_mutex_lock(&md->mutex);
1074         if (async) {
1075                 list_add_tail(&async->ordered, &md->ordered);
1076                 md->num_items++;
1077                 if (md->compress_level > 0) {
1078                         list_add_tail(&async->list, &md->list);
1079                         pthread_cond_signal(&md->cond);
1080                 } else {
1081                         md->num_ready++;
1082                 }
1083         }
1084         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1085                 ret = write_buffers(md, &start);
1086                 if (ret)
1087                         error("unable to write buffers: %s", strerror(-ret));
1088                 else
1089                         meta_cluster_init(md, start);
1090         }
1091         pthread_mutex_unlock(&md->mutex);
1092         return ret;
1093 }
1094
1095 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1096                       int data)
1097 {
1098         int ret;
1099         if (md->data != data ||
1100             md->pending_size + size > MAX_PENDING_SIZE ||
1101             md->pending_start + md->pending_size != start) {
1102                 ret = flush_pending(md, 0);
1103                 if (ret)
1104                         return ret;
1105                 md->pending_start = start;
1106         }
1107         readahead_tree_block(md->root->fs_info, start, 0);
1108         md->pending_size += size;
1109         md->data = data;
1110         return 0;
1111 }
1112
1113 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1114 static int is_tree_block(struct btrfs_root *extent_root,
1115                          struct btrfs_path *path, u64 bytenr)
1116 {
1117         struct extent_buffer *leaf;
1118         struct btrfs_key key;
1119         u64 ref_objectid;
1120         int ret;
1121
1122         leaf = path->nodes[0];
1123         while (1) {
1124                 struct btrfs_extent_ref_v0 *ref_item;
1125                 path->slots[0]++;
1126                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1127                         ret = btrfs_next_leaf(extent_root, path);
1128                         if (ret < 0)
1129                                 return ret;
1130                         if (ret > 0)
1131                                 break;
1132                         leaf = path->nodes[0];
1133                 }
1134                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1135                 if (key.objectid != bytenr)
1136                         break;
1137                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1138                         continue;
1139                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1140                                           struct btrfs_extent_ref_v0);
1141                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1142                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1143                         return 1;
1144                 break;
1145         }
1146         return 0;
1147 }
1148 #endif
1149
1150 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1151                             struct metadump_struct *metadump, int root_tree)
1152 {
1153         struct extent_buffer *tmp;
1154         struct btrfs_root_item *ri;
1155         struct btrfs_key key;
1156         struct btrfs_fs_info *fs_info = root->fs_info;
1157         u64 bytenr;
1158         int level;
1159         int nritems = 0;
1160         int i = 0;
1161         int ret;
1162
1163         ret = add_extent(btrfs_header_bytenr(eb), fs_info->nodesize,
1164                          metadump, 0);
1165         if (ret) {
1166                 error("unable to add metadata block %llu: %d",
1167                                 btrfs_header_bytenr(eb), ret);
1168                 return ret;
1169         }
1170
1171         if (btrfs_header_level(eb) == 0 && !root_tree)
1172                 return 0;
1173
1174         level = btrfs_header_level(eb);
1175         nritems = btrfs_header_nritems(eb);
1176         for (i = 0; i < nritems; i++) {
1177                 if (level == 0) {
1178                         btrfs_item_key_to_cpu(eb, &key, i);
1179                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1180                                 continue;
1181                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1182                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1183                         tmp = read_tree_block(fs_info, bytenr, 0);
1184                         if (!extent_buffer_uptodate(tmp)) {
1185                                 error("unable to read log root block");
1186                                 return -EIO;
1187                         }
1188                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1189                         free_extent_buffer(tmp);
1190                         if (ret)
1191                                 return ret;
1192                 } else {
1193                         bytenr = btrfs_node_blockptr(eb, i);
1194                         tmp = read_tree_block(fs_info, bytenr, 0);
1195                         if (!extent_buffer_uptodate(tmp)) {
1196                                 error("unable to read log root block");
1197                                 return -EIO;
1198                         }
1199                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1200                         free_extent_buffer(tmp);
1201                         if (ret)
1202                                 return ret;
1203                 }
1204         }
1205
1206         return 0;
1207 }
1208
1209 static int copy_log_trees(struct btrfs_root *root,
1210                           struct metadump_struct *metadump)
1211 {
1212         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1213
1214         if (blocknr == 0)
1215                 return 0;
1216
1217         if (!root->fs_info->log_root_tree ||
1218             !root->fs_info->log_root_tree->node) {
1219                 error("unable to copy tree log, it has not been setup");
1220                 return -EIO;
1221         }
1222
1223         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1224                                 metadump, 1);
1225 }
1226
1227 static int copy_space_cache(struct btrfs_root *root,
1228                             struct metadump_struct *metadump,
1229                             struct btrfs_path *path)
1230 {
1231         struct extent_buffer *leaf;
1232         struct btrfs_file_extent_item *fi;
1233         struct btrfs_key key;
1234         u64 bytenr, num_bytes;
1235         int ret;
1236
1237         root = root->fs_info->tree_root;
1238
1239         key.objectid = 0;
1240         key.type = BTRFS_EXTENT_DATA_KEY;
1241         key.offset = 0;
1242
1243         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1244         if (ret < 0) {
1245                 error("free space inode not found: %d", ret);
1246                 return ret;
1247         }
1248
1249         leaf = path->nodes[0];
1250
1251         while (1) {
1252                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1253                         ret = btrfs_next_leaf(root, path);
1254                         if (ret < 0) {
1255                                 error("cannot go to next leaf %d", ret);
1256                                 return ret;
1257                         }
1258                         if (ret > 0)
1259                                 break;
1260                         leaf = path->nodes[0];
1261                 }
1262
1263                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1264                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1265                         path->slots[0]++;
1266                         continue;
1267                 }
1268
1269                 fi = btrfs_item_ptr(leaf, path->slots[0],
1270                                     struct btrfs_file_extent_item);
1271                 if (btrfs_file_extent_type(leaf, fi) !=
1272                     BTRFS_FILE_EXTENT_REG) {
1273                         path->slots[0]++;
1274                         continue;
1275                 }
1276
1277                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1278                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1279                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1280                 if (ret) {
1281                         error("unable to add space cache blocks %d", ret);
1282                         btrfs_release_path(path);
1283                         return ret;
1284                 }
1285                 path->slots[0]++;
1286         }
1287
1288         return 0;
1289 }
1290
1291 static int copy_from_extent_tree(struct metadump_struct *metadump,
1292                                  struct btrfs_path *path)
1293 {
1294         struct btrfs_root *extent_root;
1295         struct extent_buffer *leaf;
1296         struct btrfs_extent_item *ei;
1297         struct btrfs_key key;
1298         u64 bytenr;
1299         u64 num_bytes;
1300         int ret;
1301
1302         extent_root = metadump->root->fs_info->extent_root;
1303         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1304         key.objectid = bytenr;
1305         key.type = BTRFS_EXTENT_ITEM_KEY;
1306         key.offset = 0;
1307
1308         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1309         if (ret < 0) {
1310                 error("extent root not found: %d", ret);
1311                 return ret;
1312         }
1313         ret = 0;
1314
1315         leaf = path->nodes[0];
1316
1317         while (1) {
1318                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1319                         ret = btrfs_next_leaf(extent_root, path);
1320                         if (ret < 0) {
1321                                 error("cannot go to next leaf %d", ret);
1322                                 break;
1323                         }
1324                         if (ret > 0) {
1325                                 ret = 0;
1326                                 break;
1327                         }
1328                         leaf = path->nodes[0];
1329                 }
1330
1331                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1332                 if (key.objectid < bytenr ||
1333                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1334                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1335                         path->slots[0]++;
1336                         continue;
1337                 }
1338
1339                 bytenr = key.objectid;
1340                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1341                         num_bytes = extent_root->fs_info->nodesize;
1342                 } else {
1343                         num_bytes = key.offset;
1344                 }
1345
1346                 if (num_bytes == 0) {
1347                         error("extent length 0 at bytenr %llu key type %d",
1348                                         (unsigned long long)bytenr, key.type);
1349                         ret = -EIO;
1350                         break;
1351                 }
1352
1353                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1354                         ei = btrfs_item_ptr(leaf, path->slots[0],
1355                                             struct btrfs_extent_item);
1356                         if (btrfs_extent_flags(leaf, ei) &
1357                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1358                                 ret = add_extent(bytenr, num_bytes, metadump,
1359                                                  0);
1360                                 if (ret) {
1361                                         error("unable to add block %llu: %d",
1362                                                 (unsigned long long)bytenr, ret);
1363                                         break;
1364                                 }
1365                         }
1366                 } else {
1367 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1368                         ret = is_tree_block(extent_root, path, bytenr);
1369                         if (ret < 0) {
1370                                 error("failed to check tree block %llu: %d",
1371                                         (unsigned long long)bytenr, ret);
1372                                 break;
1373                         }
1374
1375                         if (ret) {
1376                                 ret = add_extent(bytenr, num_bytes, metadump,
1377                                                  0);
1378                                 if (ret) {
1379                                         error("unable to add block %llu: %d",
1380                                                 (unsigned long long)bytenr, ret);
1381                                         break;
1382                                 }
1383                         }
1384                         ret = 0;
1385 #else
1386                         error(
1387         "either extent tree is corrupted or you haven't built with V0 support");
1388                         ret = -EIO;
1389                         break;
1390 #endif
1391                 }
1392                 bytenr += num_bytes;
1393         }
1394
1395         btrfs_release_path(path);
1396
1397         return ret;
1398 }
1399
1400 static int create_metadump(const char *input, FILE *out, int num_threads,
1401                            int compress_level, enum sanitize_mode sanitize,
1402                            int walk_trees)
1403 {
1404         struct btrfs_root *root;
1405         struct btrfs_path path;
1406         struct metadump_struct metadump;
1407         int ret;
1408         int err = 0;
1409
1410         root = open_ctree(input, 0, 0);
1411         if (!root) {
1412                 error("open ctree failed");
1413                 return -EIO;
1414         }
1415
1416         ret = metadump_init(&metadump, root, out, num_threads,
1417                             compress_level, sanitize);
1418         if (ret) {
1419                 error("failed to initialize metadump: %d", ret);
1420                 close_ctree(root);
1421                 return ret;
1422         }
1423
1424         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1425                         &metadump, 0);
1426         if (ret) {
1427                 error("unable to add metadata: %d", ret);
1428                 err = ret;
1429                 goto out;
1430         }
1431
1432         btrfs_init_path(&path);
1433
1434         if (walk_trees) {
1435                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1436                                        &metadump, 1);
1437                 if (ret) {
1438                         err = ret;
1439                         goto out;
1440                 }
1441
1442                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1443                                        &metadump, 1);
1444                 if (ret) {
1445                         err = ret;
1446                         goto out;
1447                 }
1448         } else {
1449                 ret = copy_from_extent_tree(&metadump, &path);
1450                 if (ret) {
1451                         err = ret;
1452                         goto out;
1453                 }
1454         }
1455
1456         ret = copy_log_trees(root, &metadump);
1457         if (ret) {
1458                 err = ret;
1459                 goto out;
1460         }
1461
1462         ret = copy_space_cache(root, &metadump, &path);
1463 out:
1464         ret = flush_pending(&metadump, 1);
1465         if (ret) {
1466                 if (!err)
1467                         err = ret;
1468                 error("failed to flush pending data: %d", ret);
1469         }
1470
1471         metadump_destroy(&metadump, num_threads);
1472
1473         btrfs_release_path(&path);
1474         ret = close_ctree(root);
1475         return err ? err : ret;
1476 }
1477
1478 static void update_super_old(u8 *buffer)
1479 {
1480         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1481         struct btrfs_chunk *chunk;
1482         struct btrfs_disk_key *key;
1483         u32 sectorsize = btrfs_super_sectorsize(super);
1484         u64 flags = btrfs_super_flags(super);
1485
1486         flags |= BTRFS_SUPER_FLAG_METADUMP;
1487         btrfs_set_super_flags(super, flags);
1488
1489         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1490         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1491                                        sizeof(struct btrfs_disk_key));
1492
1493         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1494         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1495         btrfs_set_disk_key_offset(key, 0);
1496
1497         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1498         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1499         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1500         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1501         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1502         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1503         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1504         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1505         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1506         chunk->stripe.devid = super->dev_item.devid;
1507         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1508         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1509         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1510         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1511 }
1512
1513 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1514 {
1515         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1516         struct btrfs_chunk *chunk;
1517         struct btrfs_disk_key *disk_key;
1518         struct btrfs_key key;
1519         u64 flags = btrfs_super_flags(super);
1520         u32 new_array_size = 0;
1521         u32 array_size;
1522         u32 cur = 0;
1523         u8 *ptr, *write_ptr;
1524         int old_num_stripes;
1525
1526         write_ptr = ptr = super->sys_chunk_array;
1527         array_size = btrfs_super_sys_array_size(super);
1528
1529         while (cur < array_size) {
1530                 disk_key = (struct btrfs_disk_key *)ptr;
1531                 btrfs_disk_key_to_cpu(&key, disk_key);
1532
1533                 new_array_size += sizeof(*disk_key);
1534                 memmove(write_ptr, ptr, sizeof(*disk_key));
1535
1536                 write_ptr += sizeof(*disk_key);
1537                 ptr += sizeof(*disk_key);
1538                 cur += sizeof(*disk_key);
1539
1540                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1541                         u64 type, physical, physical_dup, size = 0;
1542
1543                         chunk = (struct btrfs_chunk *)ptr;
1544                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1545                         chunk = (struct btrfs_chunk *)write_ptr;
1546
1547                         memmove(write_ptr, ptr, sizeof(*chunk));
1548                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1549                         type = btrfs_stack_chunk_type(chunk);
1550                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1551                                 new_array_size += sizeof(struct btrfs_stripe);
1552                                 write_ptr += sizeof(struct btrfs_stripe);
1553                         } else {
1554                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1555                                 btrfs_set_stack_chunk_type(chunk,
1556                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1557                         }
1558                         chunk->stripe.devid = super->dev_item.devid;
1559                         physical = logical_to_physical(mdres, key.offset,
1560                                                        &size, &physical_dup);
1561                         if (size != (u64)-1)
1562                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1563                                                               physical);
1564                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1565                                BTRFS_UUID_SIZE);
1566                         new_array_size += sizeof(*chunk);
1567                 } else {
1568                         error("bogus key in the sys array %d", key.type);
1569                         return -EIO;
1570                 }
1571                 write_ptr += sizeof(*chunk);
1572                 ptr += btrfs_chunk_item_size(old_num_stripes);
1573                 cur += btrfs_chunk_item_size(old_num_stripes);
1574         }
1575
1576         if (mdres->clear_space_cache)
1577                 btrfs_set_super_cache_generation(super, 0);
1578
1579         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1580         btrfs_set_super_flags(super, flags);
1581         btrfs_set_super_sys_array_size(super, new_array_size);
1582         btrfs_set_super_num_devices(super, 1);
1583         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1584
1585         return 0;
1586 }
1587
1588 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1589 {
1590         struct extent_buffer *eb;
1591
1592         eb = calloc(1, sizeof(struct extent_buffer) + size);
1593         if (!eb)
1594                 return NULL;
1595
1596         eb->start = bytenr;
1597         eb->len = size;
1598         return eb;
1599 }
1600
1601 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1602 {
1603         struct btrfs_item *item;
1604         u32 nritems;
1605         u32 old_size;
1606         u32 old_data_start;
1607         u32 size_diff;
1608         u32 data_end;
1609         int i;
1610
1611         old_size = btrfs_item_size_nr(eb, slot);
1612         if (old_size == new_size)
1613                 return;
1614
1615         nritems = btrfs_header_nritems(eb);
1616         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1617
1618         old_data_start = btrfs_item_offset_nr(eb, slot);
1619         size_diff = old_size - new_size;
1620
1621         for (i = slot; i < nritems; i++) {
1622                 u32 ioff;
1623                 item = btrfs_item_nr(i);
1624                 ioff = btrfs_item_offset(eb, item);
1625                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1626         }
1627
1628         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1629                               btrfs_leaf_data(eb) + data_end,
1630                               old_data_start + new_size - data_end);
1631         item = btrfs_item_nr(slot);
1632         btrfs_set_item_size(eb, item, new_size);
1633 }
1634
1635 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1636                                   struct async_work *async, u8 *buffer,
1637                                   size_t size)
1638 {
1639         struct extent_buffer *eb;
1640         size_t size_left = size;
1641         u64 bytenr = async->start;
1642         int i;
1643
1644         if (size_left % mdres->nodesize)
1645                 return 0;
1646
1647         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1648         if (!eb)
1649                 return -ENOMEM;
1650
1651         while (size_left) {
1652                 eb->start = bytenr;
1653                 memcpy(eb->data, buffer, mdres->nodesize);
1654
1655                 if (btrfs_header_bytenr(eb) != bytenr)
1656                         break;
1657                 if (memcmp(mdres->fsid,
1658                            eb->data + offsetof(struct btrfs_header, fsid),
1659                            BTRFS_FSID_SIZE))
1660                         break;
1661
1662                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1663                         goto next;
1664
1665                 if (btrfs_header_level(eb) != 0)
1666                         goto next;
1667
1668                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1669                         struct btrfs_chunk *chunk;
1670                         struct btrfs_key key;
1671                         u64 type, physical, physical_dup, size = (u64)-1;
1672
1673                         btrfs_item_key_to_cpu(eb, &key, i);
1674                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1675                                 continue;
1676
1677                         size = 0;
1678                         physical = logical_to_physical(mdres, key.offset,
1679                                                        &size, &physical_dup);
1680
1681                         if (!physical_dup)
1682                                 truncate_item(eb, i, sizeof(*chunk));
1683                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1684
1685
1686                         /* Zero out the RAID profile */
1687                         type = btrfs_chunk_type(eb, chunk);
1688                         type &= (BTRFS_BLOCK_GROUP_DATA |
1689                                  BTRFS_BLOCK_GROUP_SYSTEM |
1690                                  BTRFS_BLOCK_GROUP_METADATA |
1691                                  BTRFS_BLOCK_GROUP_DUP);
1692                         btrfs_set_chunk_type(eb, chunk, type);
1693
1694                         if (!physical_dup)
1695                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1696                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1697                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1698                         if (size != (u64)-1)
1699                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1700                                                            physical);
1701                         /* update stripe 2 offset */
1702                         if (physical_dup)
1703                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1704                                                            physical_dup);
1705
1706                         write_extent_buffer(eb, mdres->uuid,
1707                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1708                                                 chunk, 0),
1709                                         BTRFS_UUID_SIZE);
1710                 }
1711                 memcpy(buffer, eb->data, eb->len);
1712                 csum_block(buffer, eb->len);
1713 next:
1714                 size_left -= mdres->nodesize;
1715                 buffer += mdres->nodesize;
1716                 bytenr += mdres->nodesize;
1717         }
1718
1719         free(eb);
1720         return 0;
1721 }
1722
1723 static void write_backup_supers(int fd, u8 *buf)
1724 {
1725         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1726         struct stat st;
1727         u64 size;
1728         u64 bytenr;
1729         int i;
1730         int ret;
1731
1732         if (fstat(fd, &st)) {
1733                 error(
1734         "cannot stat restore point, won't be able to write backup supers: %s",
1735                         strerror(errno));
1736                 return;
1737         }
1738
1739         size = btrfs_device_size(fd, &st);
1740
1741         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1742                 bytenr = btrfs_sb_offset(i);
1743                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1744                         break;
1745                 btrfs_set_super_bytenr(super, bytenr);
1746                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1747                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1748                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1749                         if (ret < 0)
1750                                 error(
1751                                 "problem writing out backup super block %d: %s",
1752                                                 i, strerror(errno));
1753                         else
1754                                 error("short write writing out backup super block");
1755                         break;
1756                 }
1757         }
1758 }
1759
1760 static void *restore_worker(void *data)
1761 {
1762         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1763         struct async_work *async;
1764         size_t size;
1765         u8 *buffer;
1766         u8 *outbuf;
1767         int outfd;
1768         int ret;
1769         int compress_size = MAX_PENDING_SIZE * 4;
1770
1771         outfd = fileno(mdres->out);
1772         buffer = malloc(compress_size);
1773         if (!buffer) {
1774                 error("not enough memory for restore worker buffer");
1775                 pthread_mutex_lock(&mdres->mutex);
1776                 if (!mdres->error)
1777                         mdres->error = -ENOMEM;
1778                 pthread_mutex_unlock(&mdres->mutex);
1779                 pthread_exit(NULL);
1780         }
1781
1782         while (1) {
1783                 u64 bytenr, physical_dup;
1784                 off_t offset = 0;
1785                 int err = 0;
1786
1787                 pthread_mutex_lock(&mdres->mutex);
1788                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1789                         if (mdres->done) {
1790                                 pthread_mutex_unlock(&mdres->mutex);
1791                                 goto out;
1792                         }
1793                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1794                 }
1795                 async = list_entry(mdres->list.next, struct async_work, list);
1796                 list_del_init(&async->list);
1797
1798                 if (mdres->compress_method == COMPRESS_ZLIB) {
1799                         size = compress_size; 
1800                         pthread_mutex_unlock(&mdres->mutex);
1801                         ret = uncompress(buffer, (unsigned long *)&size,
1802                                          async->buffer, async->bufsize);
1803                         pthread_mutex_lock(&mdres->mutex);
1804                         if (ret != Z_OK) {
1805                                 error("decompression failed with %d", ret);
1806                                 err = -EIO;
1807                         }
1808                         outbuf = buffer;
1809                 } else {
1810                         outbuf = async->buffer;
1811                         size = async->bufsize;
1812                 }
1813
1814                 if (!mdres->multi_devices) {
1815                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1816                                 if (mdres->old_restore) {
1817                                         update_super_old(outbuf);
1818                                 } else {
1819                                         ret = update_super(mdres, outbuf);
1820                                         if (ret)
1821                                                 err = ret;
1822                                 }
1823                         } else if (!mdres->old_restore) {
1824                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1825                                 if (ret)
1826                                         err = ret;
1827                         }
1828                 }
1829
1830                 if (!mdres->fixup_offset) {
1831                         while (size) {
1832                                 u64 chunk_size = size;
1833                                 physical_dup = 0;
1834                                 if (!mdres->multi_devices && !mdres->old_restore)
1835                                         bytenr = logical_to_physical(mdres,
1836                                                      async->start + offset,
1837                                                      &chunk_size,
1838                                                      &physical_dup);
1839                                 else
1840                                         bytenr = async->start + offset;
1841
1842                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1843                                                bytenr);
1844                                 if (ret != chunk_size)
1845                                         goto error;
1846
1847                                 if (physical_dup)
1848                                         ret = pwrite64(outfd, outbuf+offset,
1849                                                        chunk_size,
1850                                                        physical_dup);
1851                                 if (ret != chunk_size)
1852                                         goto error;
1853
1854                                 size -= chunk_size;
1855                                 offset += chunk_size;
1856                                 continue;
1857
1858 error:
1859                                 if (ret < 0) {
1860                                         error("unable to write to device: %s",
1861                                                         strerror(errno));
1862                                         err = errno;
1863                                 } else {
1864                                         error("short write");
1865                                         err = -EIO;
1866                                 }
1867                         }
1868                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1869                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1870                         if (ret) {
1871                                 error("failed to write data");
1872                                 exit(1);
1873                         }
1874                 }
1875
1876
1877                 /* backup super blocks are already there at fixup_offset stage */
1878                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1879                         write_backup_supers(outfd, outbuf);
1880
1881                 if (err && !mdres->error)
1882                         mdres->error = err;
1883                 mdres->num_items--;
1884                 pthread_mutex_unlock(&mdres->mutex);
1885
1886                 free(async->buffer);
1887                 free(async);
1888         }
1889 out:
1890         free(buffer);
1891         pthread_exit(NULL);
1892 }
1893
1894 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1895 {
1896         struct rb_node *n;
1897         int i;
1898
1899         while ((n = rb_first(&mdres->chunk_tree))) {
1900                 struct fs_chunk *entry;
1901
1902                 entry = rb_entry(n, struct fs_chunk, l);
1903                 rb_erase(n, &mdres->chunk_tree);
1904                 rb_erase(&entry->p, &mdres->physical_tree);
1905                 free(entry);
1906         }
1907         pthread_mutex_lock(&mdres->mutex);
1908         mdres->done = 1;
1909         pthread_cond_broadcast(&mdres->cond);
1910         pthread_mutex_unlock(&mdres->mutex);
1911
1912         for (i = 0; i < num_threads; i++)
1913                 pthread_join(mdres->threads[i], NULL);
1914
1915         pthread_cond_destroy(&mdres->cond);
1916         pthread_mutex_destroy(&mdres->mutex);
1917 }
1918
1919 static int mdrestore_init(struct mdrestore_struct *mdres,
1920                           FILE *in, FILE *out, int old_restore,
1921                           int num_threads, int fixup_offset,
1922                           struct btrfs_fs_info *info, int multi_devices)
1923 {
1924         int i, ret = 0;
1925
1926         memset(mdres, 0, sizeof(*mdres));
1927         pthread_cond_init(&mdres->cond, NULL);
1928         pthread_mutex_init(&mdres->mutex, NULL);
1929         INIT_LIST_HEAD(&mdres->list);
1930         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1931         mdres->in = in;
1932         mdres->out = out;
1933         mdres->old_restore = old_restore;
1934         mdres->chunk_tree.rb_node = NULL;
1935         mdres->fixup_offset = fixup_offset;
1936         mdres->info = info;
1937         mdres->multi_devices = multi_devices;
1938         mdres->clear_space_cache = 0;
1939         mdres->last_physical_offset = 0;
1940         mdres->alloced_chunks = 0;
1941
1942         if (!num_threads)
1943                 return 0;
1944
1945         mdres->num_threads = num_threads;
1946         for (i = 0; i < num_threads; i++) {
1947                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1948                                      mdres);
1949                 if (ret) {
1950                         /* pthread_create returns errno directly */
1951                         ret = -ret;
1952                         break;
1953                 }
1954         }
1955         if (ret)
1956                 mdrestore_destroy(mdres, i + 1);
1957         return ret;
1958 }
1959
1960 static int fill_mdres_info(struct mdrestore_struct *mdres,
1961                            struct async_work *async)
1962 {
1963         struct btrfs_super_block *super;
1964         u8 *buffer = NULL;
1965         u8 *outbuf;
1966         int ret;
1967
1968         /* We've already been initialized */
1969         if (mdres->nodesize)
1970                 return 0;
1971
1972         if (mdres->compress_method == COMPRESS_ZLIB) {
1973                 size_t size = MAX_PENDING_SIZE * 2;
1974
1975                 buffer = malloc(MAX_PENDING_SIZE * 2);
1976                 if (!buffer)
1977                         return -ENOMEM;
1978                 ret = uncompress(buffer, (unsigned long *)&size,
1979                                  async->buffer, async->bufsize);
1980                 if (ret != Z_OK) {
1981                         error("decompression failed with %d", ret);
1982                         free(buffer);
1983                         return -EIO;
1984                 }
1985                 outbuf = buffer;
1986         } else {
1987                 outbuf = async->buffer;
1988         }
1989
1990         super = (struct btrfs_super_block *)outbuf;
1991         mdres->nodesize = btrfs_super_nodesize(super);
1992         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1993         memcpy(mdres->uuid, super->dev_item.uuid,
1994                        BTRFS_UUID_SIZE);
1995         mdres->devid = le64_to_cpu(super->dev_item.devid);
1996         free(buffer);
1997         return 0;
1998 }
1999
2000 static int add_cluster(struct meta_cluster *cluster,
2001                        struct mdrestore_struct *mdres, u64 *next)
2002 {
2003         struct meta_cluster_item *item;
2004         struct meta_cluster_header *header = &cluster->header;
2005         struct async_work *async;
2006         u64 bytenr;
2007         u32 i, nritems;
2008         int ret;
2009
2010         pthread_mutex_lock(&mdres->mutex);
2011         mdres->compress_method = header->compress;
2012         pthread_mutex_unlock(&mdres->mutex);
2013
2014         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
2015         nritems = le32_to_cpu(header->nritems);
2016         for (i = 0; i < nritems; i++) {
2017                 item = &cluster->items[i];
2018                 async = calloc(1, sizeof(*async));
2019                 if (!async) {
2020                         error("not enough memory for async data");
2021                         return -ENOMEM;
2022                 }
2023                 async->start = le64_to_cpu(item->bytenr);
2024                 async->bufsize = le32_to_cpu(item->size);
2025                 async->buffer = malloc(async->bufsize);
2026                 if (!async->buffer) {
2027                         error("not enough memory for async buffer");
2028                         free(async);
2029                         return -ENOMEM;
2030                 }
2031                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
2032                 if (ret != 1) {
2033                         error("unable to read buffer: %s", strerror(errno));
2034                         free(async->buffer);
2035                         free(async);
2036                         return -EIO;
2037                 }
2038                 bytenr += async->bufsize;
2039
2040                 pthread_mutex_lock(&mdres->mutex);
2041                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
2042                         ret = fill_mdres_info(mdres, async);
2043                         if (ret) {
2044                                 error("unable to set up restore state");
2045                                 pthread_mutex_unlock(&mdres->mutex);
2046                                 free(async->buffer);
2047                                 free(async);
2048                                 return ret;
2049                         }
2050                 }
2051                 list_add_tail(&async->list, &mdres->list);
2052                 mdres->num_items++;
2053                 pthread_cond_signal(&mdres->cond);
2054                 pthread_mutex_unlock(&mdres->mutex);
2055         }
2056         if (bytenr & BLOCK_MASK) {
2057                 char buffer[BLOCK_MASK];
2058                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
2059
2060                 bytenr += size;
2061                 ret = fread(buffer, size, 1, mdres->in);
2062                 if (ret != 1) {
2063                         error("failed to read buffer: %s", strerror(errno));
2064                         return -EIO;
2065                 }
2066         }
2067         *next = bytenr;
2068         return 0;
2069 }
2070
2071 static int wait_for_worker(struct mdrestore_struct *mdres)
2072 {
2073         int ret = 0;
2074
2075         pthread_mutex_lock(&mdres->mutex);
2076         ret = mdres->error;
2077         while (!ret && mdres->num_items > 0) {
2078                 struct timespec ts = {
2079                         .tv_sec = 0,
2080                         .tv_nsec = 10000000,
2081                 };
2082                 pthread_mutex_unlock(&mdres->mutex);
2083                 nanosleep(&ts, NULL);
2084                 pthread_mutex_lock(&mdres->mutex);
2085                 ret = mdres->error;
2086         }
2087         pthread_mutex_unlock(&mdres->mutex);
2088         return ret;
2089 }
2090
2091 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2092                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2093                             u64 cluster_bytenr)
2094 {
2095         struct extent_buffer *eb;
2096         int ret = 0;
2097         int i;
2098
2099         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2100         if (!eb) {
2101                 ret = -ENOMEM;
2102                 goto out;
2103         }
2104
2105         while (item_bytenr != bytenr) {
2106                 buffer += mdres->nodesize;
2107                 item_bytenr += mdres->nodesize;
2108         }
2109
2110         memcpy(eb->data, buffer, mdres->nodesize);
2111         if (btrfs_header_bytenr(eb) != bytenr) {
2112                 error("eb bytenr does not match found bytenr: %llu != %llu",
2113                                 (unsigned long long)btrfs_header_bytenr(eb),
2114                                 (unsigned long long)bytenr);
2115                 ret = -EIO;
2116                 goto out;
2117         }
2118
2119         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2120                    BTRFS_FSID_SIZE)) {
2121                 error("filesystem UUID of eb %llu does not match",
2122                                 (unsigned long long)bytenr);
2123                 ret = -EIO;
2124                 goto out;
2125         }
2126
2127         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2128                 error("wrong eb %llu owner %llu",
2129                                 (unsigned long long)bytenr,
2130                                 (unsigned long long)btrfs_header_owner(eb));
2131                 ret = -EIO;
2132                 goto out;
2133         }
2134
2135         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2136                 struct btrfs_chunk *chunk;
2137                 struct fs_chunk *fs_chunk;
2138                 struct btrfs_key key;
2139                 u64 type;
2140
2141                 if (btrfs_header_level(eb)) {
2142                         u64 blockptr = btrfs_node_blockptr(eb, i);
2143
2144                         ret = search_for_chunk_blocks(mdres, blockptr,
2145                                                       cluster_bytenr);
2146                         if (ret)
2147                                 break;
2148                         continue;
2149                 }
2150
2151                 /* Yay a leaf!  We loves leafs! */
2152                 btrfs_item_key_to_cpu(eb, &key, i);
2153                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2154                         continue;
2155
2156                 fs_chunk = malloc(sizeof(struct fs_chunk));
2157                 if (!fs_chunk) {
2158                         error("not enough memory to allocate chunk");
2159                         ret = -ENOMEM;
2160                         break;
2161                 }
2162                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2163                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2164
2165                 fs_chunk->logical = key.offset;
2166                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2167                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2168                 INIT_LIST_HEAD(&fs_chunk->list);
2169                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2170                                 physical_cmp, 1) != NULL)
2171                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2172                 else
2173                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2174                                     physical_cmp);
2175
2176                 type = btrfs_chunk_type(eb, chunk);
2177                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2178                         fs_chunk->physical_dup =
2179                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2180                 }
2181
2182                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2183                     mdres->last_physical_offset)
2184                         mdres->last_physical_offset = fs_chunk->physical_dup +
2185                                 fs_chunk->bytes;
2186                 else if (fs_chunk->physical + fs_chunk->bytes >
2187                     mdres->last_physical_offset)
2188                         mdres->last_physical_offset = fs_chunk->physical +
2189                                 fs_chunk->bytes;
2190                 mdres->alloced_chunks += fs_chunk->bytes;
2191                 /* in dup case, fs_chunk->bytes should add twice */
2192                 if (fs_chunk->physical_dup)
2193                         mdres->alloced_chunks += fs_chunk->bytes;
2194                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2195         }
2196 out:
2197         free(eb);
2198         return ret;
2199 }
2200
2201 /* If you have to ask you aren't worthy */
2202 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2203                                    u64 search, u64 cluster_bytenr)
2204 {
2205         struct meta_cluster *cluster;
2206         struct meta_cluster_header *header;
2207         struct meta_cluster_item *item;
2208         u64 current_cluster = cluster_bytenr, bytenr;
2209         u64 item_bytenr;
2210         u32 bufsize, nritems, i;
2211         u32 max_size = MAX_PENDING_SIZE * 2;
2212         u8 *buffer, *tmp = NULL;
2213         int ret = 0;
2214
2215         cluster = malloc(BLOCK_SIZE);
2216         if (!cluster) {
2217                 error("not enough memory for cluster");
2218                 return -ENOMEM;
2219         }
2220
2221         buffer = malloc(max_size);
2222         if (!buffer) {
2223                 error("not enough memory for buffer");
2224                 free(cluster);
2225                 return -ENOMEM;
2226         }
2227
2228         if (mdres->compress_method == COMPRESS_ZLIB) {
2229                 tmp = malloc(max_size);
2230                 if (!tmp) {
2231                         error("not enough memory for buffer");
2232                         free(cluster);
2233                         free(buffer);
2234                         return -ENOMEM;
2235                 }
2236         }
2237
2238         bytenr = current_cluster;
2239         while (1) {
2240                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2241                         error("seek failed: %s", strerror(errno));
2242                         ret = -EIO;
2243                         break;
2244                 }
2245
2246                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2247                 if (ret == 0) {
2248                         if (cluster_bytenr != 0) {
2249                                 cluster_bytenr = 0;
2250                                 current_cluster = 0;
2251                                 bytenr = 0;
2252                                 continue;
2253                         }
2254                         error(
2255         "unknown state after reading cluster at %llu, probably corrupted data",
2256                                         cluster_bytenr);
2257                         ret = -EIO;
2258                         break;
2259                 } else if (ret < 0) {
2260                         error("unable to read image at %llu: %s",
2261                                         (unsigned long long)cluster_bytenr,
2262                                         strerror(errno));
2263                         break;
2264                 }
2265                 ret = 0;
2266
2267                 header = &cluster->header;
2268                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2269                     le64_to_cpu(header->bytenr) != current_cluster) {
2270                         error("bad header in metadump image");
2271                         ret = -EIO;
2272                         break;
2273                 }
2274
2275                 bytenr += BLOCK_SIZE;
2276                 nritems = le32_to_cpu(header->nritems);
2277                 for (i = 0; i < nritems; i++) {
2278                         size_t size;
2279
2280                         item = &cluster->items[i];
2281                         bufsize = le32_to_cpu(item->size);
2282                         item_bytenr = le64_to_cpu(item->bytenr);
2283
2284                         if (bufsize > max_size) {
2285                                 error("item %u too big: %u > %u", i, bufsize,
2286                                                 max_size);
2287                                 ret = -EIO;
2288                                 break;
2289                         }
2290
2291                         if (mdres->compress_method == COMPRESS_ZLIB) {
2292                                 ret = fread(tmp, bufsize, 1, mdres->in);
2293                                 if (ret != 1) {
2294                                         error("read error: %s", strerror(errno));
2295                                         ret = -EIO;
2296                                         break;
2297                                 }
2298
2299                                 size = max_size;
2300                                 ret = uncompress(buffer,
2301                                                  (unsigned long *)&size, tmp,
2302                                                  bufsize);
2303                                 if (ret != Z_OK) {
2304                                         error("decompression failed with %d",
2305                                                         ret);
2306                                         ret = -EIO;
2307                                         break;
2308                                 }
2309                         } else {
2310                                 ret = fread(buffer, bufsize, 1, mdres->in);
2311                                 if (ret != 1) {
2312                                         error("read error: %s",
2313                                                         strerror(errno));
2314                                         ret = -EIO;
2315                                         break;
2316                                 }
2317                                 size = bufsize;
2318                         }
2319                         ret = 0;
2320
2321                         if (item_bytenr <= search &&
2322                             item_bytenr + size > search) {
2323                                 ret = read_chunk_block(mdres, buffer, search,
2324                                                        item_bytenr, size,
2325                                                        current_cluster);
2326                                 if (!ret)
2327                                         ret = 1;
2328                                 break;
2329                         }
2330                         bytenr += bufsize;
2331                 }
2332                 if (ret) {
2333                         if (ret > 0)
2334                                 ret = 0;
2335                         break;
2336                 }
2337                 if (bytenr & BLOCK_MASK)
2338                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2339                 current_cluster = bytenr;
2340         }
2341
2342         free(tmp);
2343         free(buffer);
2344         free(cluster);
2345         return ret;
2346 }
2347
2348 static int build_chunk_tree(struct mdrestore_struct *mdres,
2349                             struct meta_cluster *cluster)
2350 {
2351         struct btrfs_super_block *super;
2352         struct meta_cluster_header *header;
2353         struct meta_cluster_item *item = NULL;
2354         u64 chunk_root_bytenr = 0;
2355         u32 i, nritems;
2356         u64 bytenr = 0;
2357         u8 *buffer;
2358         int ret;
2359
2360         /* We can't seek with stdin so don't bother doing this */
2361         if (mdres->in == stdin)
2362                 return 0;
2363
2364         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2365         if (ret <= 0) {
2366                 error("unable to read cluster: %s", strerror(errno));
2367                 return -EIO;
2368         }
2369         ret = 0;
2370
2371         header = &cluster->header;
2372         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2373             le64_to_cpu(header->bytenr) != 0) {
2374                 error("bad header in metadump image");
2375                 return -EIO;
2376         }
2377
2378         bytenr += BLOCK_SIZE;
2379         mdres->compress_method = header->compress;
2380         nritems = le32_to_cpu(header->nritems);
2381         for (i = 0; i < nritems; i++) {
2382                 item = &cluster->items[i];
2383
2384                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2385                         break;
2386                 bytenr += le32_to_cpu(item->size);
2387                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2388                         error("seek failed: %s", strerror(errno));
2389                         return -EIO;
2390                 }
2391         }
2392
2393         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2394                 error("did not find superblock at %llu",
2395                                 le64_to_cpu(item->bytenr));
2396                 return -EINVAL;
2397         }
2398
2399         buffer = malloc(le32_to_cpu(item->size));
2400         if (!buffer) {
2401                 error("not enough memory to allocate buffer");
2402                 return -ENOMEM;
2403         }
2404
2405         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2406         if (ret != 1) {
2407                 error("unable to read buffer: %s", strerror(errno));
2408                 free(buffer);
2409                 return -EIO;
2410         }
2411
2412         if (mdres->compress_method == COMPRESS_ZLIB) {
2413                 size_t size = MAX_PENDING_SIZE * 2;
2414                 u8 *tmp;
2415
2416                 tmp = malloc(MAX_PENDING_SIZE * 2);
2417                 if (!tmp) {
2418                         free(buffer);
2419                         return -ENOMEM;
2420                 }
2421                 ret = uncompress(tmp, (unsigned long *)&size,
2422                                  buffer, le32_to_cpu(item->size));
2423                 if (ret != Z_OK) {
2424                         error("decompression failed with %d", ret);
2425                         free(buffer);
2426                         free(tmp);
2427                         return -EIO;
2428                 }
2429                 free(buffer);
2430                 buffer = tmp;
2431         }
2432
2433         pthread_mutex_lock(&mdres->mutex);
2434         super = (struct btrfs_super_block *)buffer;
2435         chunk_root_bytenr = btrfs_super_chunk_root(super);
2436         mdres->nodesize = btrfs_super_nodesize(super);
2437         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2438         memcpy(mdres->uuid, super->dev_item.uuid,
2439                        BTRFS_UUID_SIZE);
2440         mdres->devid = le64_to_cpu(super->dev_item.devid);
2441         free(buffer);
2442         pthread_mutex_unlock(&mdres->mutex);
2443
2444         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2445 }
2446
2447 static int range_contains_super(u64 physical, u64 bytes)
2448 {
2449         u64 super_bytenr;
2450         int i;
2451
2452         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2453                 super_bytenr = btrfs_sb_offset(i);
2454                 if (super_bytenr >= physical &&
2455                     super_bytenr < physical + bytes)
2456                         return 1;
2457         }
2458
2459         return 0;
2460 }
2461
2462 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2463 {
2464         struct fs_chunk *fs_chunk;
2465
2466         while (!list_empty(&mdres->overlapping_chunks)) {
2467                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2468                                             struct fs_chunk, list);
2469                 list_del_init(&fs_chunk->list);
2470                 if (range_contains_super(fs_chunk->physical,
2471                                          fs_chunk->bytes)) {
2472                         warning(
2473 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2474                         mdres->clear_space_cache = 1;
2475                 }
2476                 fs_chunk->physical = mdres->last_physical_offset;
2477                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2478                 mdres->last_physical_offset += fs_chunk->bytes;
2479         }
2480 }
2481
2482 static int fixup_devices(struct btrfs_fs_info *fs_info,
2483                          struct mdrestore_struct *mdres, off_t dev_size)
2484 {
2485         struct btrfs_trans_handle *trans;
2486         struct btrfs_dev_item *dev_item;
2487         struct btrfs_path path;
2488         struct extent_buffer *leaf;
2489         struct btrfs_root *root = fs_info->chunk_root;
2490         struct btrfs_key key;
2491         u64 devid, cur_devid;
2492         int ret;
2493
2494         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2495         if (IS_ERR(trans)) {
2496                 error("cannot starting transaction %ld", PTR_ERR(trans));
2497                 return PTR_ERR(trans);
2498         }
2499
2500         dev_item = &fs_info->super_copy->dev_item;
2501
2502         devid = btrfs_stack_device_id(dev_item);
2503
2504         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2505         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2506
2507         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2508         key.type = BTRFS_DEV_ITEM_KEY;
2509         key.offset = 0;
2510
2511         btrfs_init_path(&path);
2512
2513 again:
2514         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2515         if (ret < 0) {
2516                 error("search failed: %d", ret);
2517                 exit(1);
2518         }
2519
2520         while (1) {
2521                 leaf = path.nodes[0];
2522                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2523                         ret = btrfs_next_leaf(root, &path);
2524                         if (ret < 0) {
2525                                 error("cannot go to next leaf %d", ret);
2526                                 exit(1);
2527                         }
2528                         if (ret > 0) {
2529                                 ret = 0;
2530                                 break;
2531                         }
2532                         leaf = path.nodes[0];
2533                 }
2534
2535                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2536                 if (key.type > BTRFS_DEV_ITEM_KEY)
2537                         break;
2538                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2539                         path.slots[0]++;
2540                         continue;
2541                 }
2542
2543                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2544                                           struct btrfs_dev_item);
2545                 cur_devid = btrfs_device_id(leaf, dev_item);
2546                 if (devid != cur_devid) {
2547                         ret = btrfs_del_item(trans, root, &path);
2548                         if (ret) {
2549                                 error("cannot delete item: %d", ret);
2550                                 exit(1);
2551                         }
2552                         btrfs_release_path(&path);
2553                         goto again;
2554                 }
2555
2556                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2557                 btrfs_set_device_bytes_used(leaf, dev_item,
2558                                             mdres->alloced_chunks);
2559                 btrfs_mark_buffer_dirty(leaf);
2560                 path.slots[0]++;
2561         }
2562
2563         btrfs_release_path(&path);
2564         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2565         if (ret) {
2566                 error("unable to commit transaction: %d", ret);
2567                 return ret;
2568         }
2569         return 0;
2570 }
2571
2572 static int restore_metadump(const char *input, FILE *out, int old_restore,
2573                             int num_threads, int fixup_offset,
2574                             const char *target, int multi_devices)
2575 {
2576         struct meta_cluster *cluster = NULL;
2577         struct meta_cluster_header *header;
2578         struct mdrestore_struct mdrestore;
2579         struct btrfs_fs_info *info = NULL;
2580         u64 bytenr = 0;
2581         FILE *in = NULL;
2582         int ret = 0;
2583
2584         if (!strcmp(input, "-")) {
2585                 in = stdin;
2586         } else {
2587                 in = fopen(input, "r");
2588                 if (!in) {
2589                         error("unable to open metadump image: %s",
2590                                         strerror(errno));
2591                         return 1;
2592                 }
2593         }
2594
2595         /* NOTE: open with write mode */
2596         if (fixup_offset) {
2597                 info = open_ctree_fs_info(target, 0, 0, 0,
2598                                           OPEN_CTREE_WRITES |
2599                                           OPEN_CTREE_RESTORE |
2600                                           OPEN_CTREE_PARTIAL);
2601                 if (!info) {
2602                         error("open ctree failed");
2603                         ret = -EIO;
2604                         goto failed_open;
2605                 }
2606         }
2607
2608         cluster = malloc(BLOCK_SIZE);
2609         if (!cluster) {
2610                 error("not enough memory for cluster");
2611                 ret = -ENOMEM;
2612                 goto failed_info;
2613         }
2614
2615         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2616                              fixup_offset, info, multi_devices);
2617         if (ret) {
2618                 error("failed to initialize metadata restore state: %d", ret);
2619                 goto failed_cluster;
2620         }
2621
2622         if (!multi_devices && !old_restore) {
2623                 ret = build_chunk_tree(&mdrestore, cluster);
2624                 if (ret)
2625                         goto out;
2626                 if (!list_empty(&mdrestore.overlapping_chunks))
2627                         remap_overlapping_chunks(&mdrestore);
2628         }
2629
2630         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2631                 error("seek failed: %s", strerror(errno));
2632                 goto out;
2633         }
2634
2635         while (!mdrestore.error) {
2636                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2637                 if (!ret)
2638                         break;
2639
2640                 header = &cluster->header;
2641                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2642                     le64_to_cpu(header->bytenr) != bytenr) {
2643                         error("bad header in metadump image");
2644                         ret = -EIO;
2645                         break;
2646                 }
2647                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2648                 if (ret) {
2649                         error("failed to add cluster: %d", ret);
2650                         break;
2651                 }
2652         }
2653         ret = wait_for_worker(&mdrestore);
2654
2655         if (!ret && !multi_devices && !old_restore) {
2656                 struct btrfs_root *root;
2657                 struct stat st;
2658
2659                 root = open_ctree_fd(fileno(out), target, 0,
2660                                           OPEN_CTREE_PARTIAL |
2661                                           OPEN_CTREE_WRITES |
2662                                           OPEN_CTREE_NO_DEVICES);
2663                 if (!root) {
2664                         error("open ctree failed in %s", target);
2665                         ret = -EIO;
2666                         goto out;
2667                 }
2668                 info = root->fs_info;
2669
2670                 if (stat(target, &st)) {
2671                         error("stat %s failed: %s", target, strerror(errno));
2672                         close_ctree(info->chunk_root);
2673                         free(cluster);
2674                         return 1;
2675                 }
2676
2677                 ret = fixup_devices(info, &mdrestore, st.st_size);
2678                 close_ctree(info->chunk_root);
2679                 if (ret)
2680                         goto out;
2681         }
2682 out:
2683         mdrestore_destroy(&mdrestore, num_threads);
2684 failed_cluster:
2685         free(cluster);
2686 failed_info:
2687         if (fixup_offset && info)
2688                 close_ctree(info->chunk_root);
2689 failed_open:
2690         if (in != stdin)
2691                 fclose(in);
2692         return ret;
2693 }
2694
2695 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2696                                        const char *other_dev, u64 cur_devid)
2697 {
2698         struct btrfs_key key;
2699         struct extent_buffer *leaf;
2700         struct btrfs_path path;
2701         struct btrfs_dev_item *dev_item;
2702         struct btrfs_super_block *disk_super;
2703         char dev_uuid[BTRFS_UUID_SIZE];
2704         char fs_uuid[BTRFS_UUID_SIZE];
2705         u64 devid, type, io_align, io_width;
2706         u64 sector_size, total_bytes, bytes_used;
2707         char buf[BTRFS_SUPER_INFO_SIZE];
2708         int fp = -1;
2709         int ret;
2710
2711         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2712         key.type = BTRFS_DEV_ITEM_KEY;
2713         key.offset = cur_devid;
2714
2715         btrfs_init_path(&path);
2716         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2717         if (ret) {
2718                 error("search key failed: %d", ret);
2719                 ret = -EIO;
2720                 goto out;
2721         }
2722
2723         leaf = path.nodes[0];
2724         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2725                                   struct btrfs_dev_item);
2726
2727         devid = btrfs_device_id(leaf, dev_item);
2728         if (devid != cur_devid) {
2729                 error("devid mismatch: %llu != %llu",
2730                                 (unsigned long long)devid,
2731                                 (unsigned long long)cur_devid);
2732                 ret = -EIO;
2733                 goto out;
2734         }
2735
2736         type = btrfs_device_type(leaf, dev_item);
2737         io_align = btrfs_device_io_align(leaf, dev_item);
2738         io_width = btrfs_device_io_width(leaf, dev_item);
2739         sector_size = btrfs_device_sector_size(leaf, dev_item);
2740         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2741         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2742         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2743         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2744
2745         btrfs_release_path(&path);
2746
2747         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2748
2749         /* update other devices' super block */
2750         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2751         if (fp < 0) {
2752                 error("could not open %s: %s", other_dev, strerror(errno));
2753                 ret = -EIO;
2754                 goto out;
2755         }
2756
2757         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2758
2759         disk_super = (struct btrfs_super_block *)buf;
2760         dev_item = &disk_super->dev_item;
2761
2762         btrfs_set_stack_device_type(dev_item, type);
2763         btrfs_set_stack_device_id(dev_item, devid);
2764         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2765         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2766         btrfs_set_stack_device_io_align(dev_item, io_align);
2767         btrfs_set_stack_device_io_width(dev_item, io_width);
2768         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2769         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2770         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2771         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2772
2773         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2774         if (ret != BTRFS_SUPER_INFO_SIZE) {
2775                 if (ret < 0)
2776                         error("cannot write superblock: %s", strerror(ret));
2777                 else
2778                         error("cannot write superblock");
2779                 ret = -EIO;
2780                 goto out;
2781         }
2782
2783         write_backup_supers(fp, (u8 *)buf);
2784
2785 out:
2786         if (fp != -1)
2787                 close(fp);
2788         return ret;
2789 }
2790
2791 static void print_usage(int ret)
2792 {
2793         printf("usage: btrfs-image [options] source target\n");
2794         printf("\t-r      \trestore metadump image\n");
2795         printf("\t-c value\tcompression level (0 ~ 9)\n");
2796         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2797         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2798         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2799         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2800         printf("\t-m       \trestore for multiple devices\n");
2801         printf("\n");
2802         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2803         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2804         exit(ret);
2805 }
2806
2807 int main(int argc, char *argv[])
2808 {
2809         char *source;
2810         char *target;
2811         u64 num_threads = 0;
2812         u64 compress_level = 0;
2813         int create = 1;
2814         int old_restore = 0;
2815         int walk_trees = 0;
2816         int multi_devices = 0;
2817         int ret;
2818         enum sanitize_mode sanitize = SANITIZE_NONE;
2819         int dev_cnt = 0;
2820         int usage_error = 0;
2821         FILE *out;
2822
2823         while (1) {
2824                 static const struct option long_options[] = {
2825                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2826                         { NULL, 0, NULL, 0 }
2827                 };
2828                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2829                 if (c < 0)
2830                         break;
2831                 switch (c) {
2832                 case 'r':
2833                         create = 0;
2834                         break;
2835                 case 't':
2836                         num_threads = arg_strtou64(optarg);
2837                         if (num_threads > MAX_WORKER_THREADS) {
2838                                 error("number of threads out of range: %llu > %d",
2839                                         (unsigned long long)num_threads,
2840                                         MAX_WORKER_THREADS);
2841                                 return 1;
2842                         }
2843                         break;
2844                 case 'c':
2845                         compress_level = arg_strtou64(optarg);
2846                         if (compress_level > 9) {
2847                                 error("compression level out of range: %llu",
2848                                         (unsigned long long)compress_level);
2849                                 return 1;
2850                         }
2851                         break;
2852                 case 'o':
2853                         old_restore = 1;
2854                         break;
2855                 case 's':
2856                         if (sanitize == SANITIZE_NONE)
2857                                 sanitize = SANITIZE_NAMES;
2858                         else if (sanitize == SANITIZE_NAMES)
2859                                 sanitize = SANITIZE_COLLISIONS;
2860                         break;
2861                 case 'w':
2862                         walk_trees = 1;
2863                         break;
2864                 case 'm':
2865                         create = 0;
2866                         multi_devices = 1;
2867                         break;
2868                         case GETOPT_VAL_HELP:
2869                 default:
2870                         print_usage(c != GETOPT_VAL_HELP);
2871                 }
2872         }
2873
2874         set_argv0(argv);
2875         if (check_argc_min(argc - optind, 2))
2876                 print_usage(1);
2877
2878         dev_cnt = argc - optind - 1;
2879
2880         if (create) {
2881                 if (old_restore) {
2882                         error(
2883                         "create and restore cannot be used at the same time");
2884                         usage_error++;
2885                 }
2886         } else {
2887                 if (walk_trees || sanitize != SANITIZE_NONE || compress_level) {
2888                         error(
2889                         "useing -w, -s, -c options for restore makes no sense");
2890                         usage_error++;
2891                 }
2892                 if (multi_devices && dev_cnt < 2) {
2893                         error("not enough devices specified for -m option");
2894                         usage_error++;
2895                 }
2896                 if (!multi_devices && dev_cnt != 1) {
2897                         error("accepts only 1 device without -m option");
2898                         usage_error++;
2899                 }
2900         }
2901
2902         if (usage_error)
2903                 print_usage(1);
2904
2905         source = argv[optind];
2906         target = argv[optind + 1];
2907
2908         if (create && !strcmp(target, "-")) {
2909                 out = stdout;
2910         } else {
2911                 out = fopen(target, "w+");
2912                 if (!out) {
2913                         error("unable to create target file %s", target);
2914                         exit(1);
2915                 }
2916         }
2917
2918         if (compress_level > 0 || create == 0) {
2919                 if (num_threads == 0) {
2920                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2921
2922                         if (tmp <= 0)
2923                                 tmp = 1;
2924                         num_threads = tmp;
2925                 }
2926         } else {
2927                 num_threads = 0;
2928         }
2929
2930         if (create) {
2931                 ret = check_mounted(source);
2932                 if (ret < 0) {
2933                         warning("unable to check mount status of: %s",
2934                                         strerror(-ret));
2935                 } else if (ret) {
2936                         warning("%s already mounted, results may be inaccurate",
2937                                         source);
2938                 }
2939
2940                 ret = create_metadump(source, out, num_threads,
2941                                       compress_level, sanitize, walk_trees);
2942         } else {
2943                 ret = restore_metadump(source, out, old_restore, num_threads,
2944                                        0, target, multi_devices);
2945         }
2946         if (ret) {
2947                 error("%s failed: %s", (create) ? "create" : "restore",
2948                        strerror(errno));
2949                 goto out;
2950         }
2951
2952          /* extended support for multiple devices */
2953         if (!create && multi_devices) {
2954                 struct btrfs_fs_info *info;
2955                 u64 total_devs;
2956                 int i;
2957
2958                 info = open_ctree_fs_info(target, 0, 0, 0,
2959                                           OPEN_CTREE_PARTIAL |
2960                                           OPEN_CTREE_RESTORE);
2961                 if (!info) {
2962                         error("open ctree failed at %s", target);
2963                         return 1;
2964                 }
2965
2966                 total_devs = btrfs_super_num_devices(info->super_copy);
2967                 if (total_devs != dev_cnt) {
2968                         error("it needs %llu devices but has only %d",
2969                                 total_devs, dev_cnt);
2970                         close_ctree(info->chunk_root);
2971                         goto out;
2972                 }
2973
2974                 /* update super block on other disks */
2975                 for (i = 2; i <= dev_cnt; i++) {
2976                         ret = update_disk_super_on_device(info,
2977                                         argv[optind + i], (u64)i);
2978                         if (ret) {
2979                                 error("update disk superblock failed devid %d: %d",
2980                                         i, ret);
2981                                 close_ctree(info->chunk_root);
2982                                 exit(1);
2983                         }
2984                 }
2985
2986                 close_ctree(info->chunk_root);
2987
2988                 /* fix metadata block to map correct chunk */
2989                 ret = restore_metadump(source, out, 0, num_threads, 1,
2990                                        target, 1);
2991                 if (ret) {
2992                         error("unable to fixup metadump: %d", ret);
2993                         exit(1);
2994                 }
2995         }
2996 out:
2997         if (out == stdout) {
2998                 fflush(out);
2999         } else {
3000                 fclose(out);
3001                 if (ret && create) {
3002                         int unlink_ret;
3003
3004                         unlink_ret = unlink(target);
3005                         if (unlink_ret)
3006                                 error("unlink output file %s failed: %s",
3007                                                 target, strerror(errno));
3008                 }
3009         }
3010
3011         btrfs_close_all_devices();
3012
3013         return !!ret;
3014 }