btrfs-progs: image: pass sanitize mode and name tree separately to sanitize_dir_item
[platform/upstream/btrfs-progs.git] / image / main.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38 #include "help.h"
39 #include "image/metadump.h"
40 #include "image/sanitize.h"
41
42 #define MAX_WORKER_THREADS      (32)
43
44 struct async_work {
45         struct list_head list;
46         struct list_head ordered;
47         u64 start;
48         u64 size;
49         u8 *buffer;
50         size_t bufsize;
51         int error;
52 };
53
54 struct metadump_struct {
55         struct btrfs_root *root;
56         FILE *out;
57
58         union {
59                 struct meta_cluster cluster;
60                 char meta_cluster_bytes[BLOCK_SIZE];
61         };
62
63         pthread_t threads[MAX_WORKER_THREADS];
64         size_t num_threads;
65         pthread_mutex_t mutex;
66         pthread_cond_t cond;
67         struct rb_root name_tree;
68
69         struct list_head list;
70         struct list_head ordered;
71         size_t num_items;
72         size_t num_ready;
73
74         u64 pending_start;
75         u64 pending_size;
76
77         int compress_level;
78         int done;
79         int data;
80         enum sanitize_mode sanitize_names;
81
82         int error;
83 };
84
85 struct mdrestore_struct {
86         FILE *in;
87         FILE *out;
88
89         pthread_t threads[MAX_WORKER_THREADS];
90         size_t num_threads;
91         pthread_mutex_t mutex;
92         pthread_cond_t cond;
93
94         struct rb_root chunk_tree;
95         struct rb_root physical_tree;
96         struct list_head list;
97         struct list_head overlapping_chunks;
98         size_t num_items;
99         u32 nodesize;
100         u64 devid;
101         u64 alloced_chunks;
102         u64 last_physical_offset;
103         u8 uuid[BTRFS_UUID_SIZE];
104         u8 fsid[BTRFS_FSID_SIZE];
105
106         int compress_method;
107         int done;
108         int error;
109         int old_restore;
110         int fixup_offset;
111         int multi_devices;
112         int clear_space_cache;
113         struct btrfs_fs_info *info;
114 };
115
116 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
117                                    u64 search, u64 cluster_bytenr);
118 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
119
120 static void csum_block(u8 *buf, size_t len)
121 {
122         u8 result[BTRFS_CRC32_SIZE];
123         u32 crc = ~(u32)0;
124         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
125         btrfs_csum_final(crc, result);
126         memcpy(buf, result, BTRFS_CRC32_SIZE);
127 }
128
129 static int has_name(struct btrfs_key *key)
130 {
131         switch (key->type) {
132         case BTRFS_DIR_ITEM_KEY:
133         case BTRFS_DIR_INDEX_KEY:
134         case BTRFS_INODE_REF_KEY:
135         case BTRFS_INODE_EXTREF_KEY:
136         case BTRFS_XATTR_ITEM_KEY:
137                 return 1;
138         default:
139                 break;
140         }
141
142         return 0;
143 }
144
145 static char *generate_garbage(u32 name_len)
146 {
147         char *buf = malloc(name_len);
148         int i;
149
150         if (!buf)
151                 return NULL;
152
153         for (i = 0; i < name_len; i++) {
154                 char c = rand_range(94) + 33;
155
156                 if (c == '/')
157                         c++;
158                 buf[i] = c;
159         }
160
161         return buf;
162 }
163
164 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
165 {
166         struct name *entry = rb_entry(a, struct name, n);
167         struct name *ins = rb_entry(b, struct name, n);
168         u32 len;
169
170         len = min(ins->len, entry->len);
171         return memcmp(ins->val, entry->val, len);
172 }
173
174 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
175 {
176         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
177         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
178
179         if (fuzz && ins->logical >= entry->logical &&
180             ins->logical < entry->logical + entry->bytes)
181                 return 0;
182
183         if (ins->logical < entry->logical)
184                 return -1;
185         else if (ins->logical > entry->logical)
186                 return 1;
187         return 0;
188 }
189
190 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
191 {
192         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
193         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
194
195         if (fuzz && ins->physical >= entry->physical &&
196             ins->physical < entry->physical + entry->bytes)
197                 return 0;
198
199         if (fuzz && entry->physical >= ins->physical &&
200             entry->physical < ins->physical + ins->bytes)
201                 return 0;
202
203         if (ins->physical < entry->physical)
204                 return -1;
205         else if (ins->physical > entry->physical)
206                 return 1;
207         return 0;
208 }
209
210 static void tree_insert(struct rb_root *root, struct rb_node *ins,
211                         int (*cmp)(struct rb_node *a, struct rb_node *b,
212                                    int fuzz))
213 {
214         struct rb_node ** p = &root->rb_node;
215         struct rb_node * parent = NULL;
216         int dir;
217
218         while(*p) {
219                 parent = *p;
220
221                 dir = cmp(*p, ins, 1);
222                 if (dir < 0)
223                         p = &(*p)->rb_left;
224                 else if (dir > 0)
225                         p = &(*p)->rb_right;
226                 else
227                         BUG();
228         }
229
230         rb_link_node(ins, parent, p);
231         rb_insert_color(ins, root);
232 }
233
234 static struct rb_node *tree_search(struct rb_root *root,
235                                    struct rb_node *search,
236                                    int (*cmp)(struct rb_node *a,
237                                               struct rb_node *b, int fuzz),
238                                    int fuzz)
239 {
240         struct rb_node *n = root->rb_node;
241         int dir;
242
243         while (n) {
244                 dir = cmp(n, search, fuzz);
245                 if (dir < 0)
246                         n = n->rb_left;
247                 else if (dir > 0)
248                         n = n->rb_right;
249                 else
250                         return n;
251         }
252
253         return NULL;
254 }
255
256 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
257                                u64 *size, u64 *physical_dup)
258 {
259         struct fs_chunk *fs_chunk;
260         struct rb_node *entry;
261         struct fs_chunk search;
262         u64 offset;
263
264         if (logical == BTRFS_SUPER_INFO_OFFSET)
265                 return logical;
266
267         search.logical = logical;
268         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
269         if (!entry) {
270                 if (mdres->in != stdin)
271                         warning("cannot find a chunk, using logical");
272                 return logical;
273         }
274         fs_chunk = rb_entry(entry, struct fs_chunk, l);
275         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
276                 BUG();
277         offset = search.logical - fs_chunk->logical;
278
279         if (physical_dup) {
280                 /* Only in dup case, physical_dup is not equal to 0 */
281                 if (fs_chunk->physical_dup)
282                         *physical_dup = fs_chunk->physical_dup + offset;
283                 else
284                         *physical_dup = 0;
285         }
286
287         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
288         return fs_chunk->physical + offset;
289 }
290
291 /*
292  * Reverse CRC-32C table
293  */
294 static const u32 crc32c_rev_table[256] = {
295         0x00000000L,0x05EC76F1L,0x0BD8EDE2L,0x0E349B13L,
296         0x17B1DBC4L,0x125DAD35L,0x1C693626L,0x198540D7L,
297         0x2F63B788L,0x2A8FC179L,0x24BB5A6AL,0x21572C9BL,
298         0x38D26C4CL,0x3D3E1ABDL,0x330A81AEL,0x36E6F75FL,
299         0x5EC76F10L,0x5B2B19E1L,0x551F82F2L,0x50F3F403L,
300         0x4976B4D4L,0x4C9AC225L,0x42AE5936L,0x47422FC7L,
301         0x71A4D898L,0x7448AE69L,0x7A7C357AL,0x7F90438BL,
302         0x6615035CL,0x63F975ADL,0x6DCDEEBEL,0x6821984FL,
303         0xBD8EDE20L,0xB862A8D1L,0xB65633C2L,0xB3BA4533L,
304         0xAA3F05E4L,0xAFD37315L,0xA1E7E806L,0xA40B9EF7L,
305         0x92ED69A8L,0x97011F59L,0x9935844AL,0x9CD9F2BBL,
306         0x855CB26CL,0x80B0C49DL,0x8E845F8EL,0x8B68297FL,
307         0xE349B130L,0xE6A5C7C1L,0xE8915CD2L,0xED7D2A23L,
308         0xF4F86AF4L,0xF1141C05L,0xFF208716L,0xFACCF1E7L,
309         0xCC2A06B8L,0xC9C67049L,0xC7F2EB5AL,0xC21E9DABL,
310         0xDB9BDD7CL,0xDE77AB8DL,0xD043309EL,0xD5AF466FL,
311         0x7EF1CAB1L,0x7B1DBC40L,0x75292753L,0x70C551A2L,
312         0x69401175L,0x6CAC6784L,0x6298FC97L,0x67748A66L,
313         0x51927D39L,0x547E0BC8L,0x5A4A90DBL,0x5FA6E62AL,
314         0x4623A6FDL,0x43CFD00CL,0x4DFB4B1FL,0x48173DEEL,
315         0x2036A5A1L,0x25DAD350L,0x2BEE4843L,0x2E023EB2L,
316         0x37877E65L,0x326B0894L,0x3C5F9387L,0x39B3E576L,
317         0x0F551229L,0x0AB964D8L,0x048DFFCBL,0x0161893AL,
318         0x18E4C9EDL,0x1D08BF1CL,0x133C240FL,0x16D052FEL,
319         0xC37F1491L,0xC6936260L,0xC8A7F973L,0xCD4B8F82L,
320         0xD4CECF55L,0xD122B9A4L,0xDF1622B7L,0xDAFA5446L,
321         0xEC1CA319L,0xE9F0D5E8L,0xE7C44EFBL,0xE228380AL,
322         0xFBAD78DDL,0xFE410E2CL,0xF075953FL,0xF599E3CEL,
323         0x9DB87B81L,0x98540D70L,0x96609663L,0x938CE092L,
324         0x8A09A045L,0x8FE5D6B4L,0x81D14DA7L,0x843D3B56L,
325         0xB2DBCC09L,0xB737BAF8L,0xB90321EBL,0xBCEF571AL,
326         0xA56A17CDL,0xA086613CL,0xAEB2FA2FL,0xAB5E8CDEL,
327         0xFDE39562L,0xF80FE393L,0xF63B7880L,0xF3D70E71L,
328         0xEA524EA6L,0xEFBE3857L,0xE18AA344L,0xE466D5B5L,
329         0xD28022EAL,0xD76C541BL,0xD958CF08L,0xDCB4B9F9L,
330         0xC531F92EL,0xC0DD8FDFL,0xCEE914CCL,0xCB05623DL,
331         0xA324FA72L,0xA6C88C83L,0xA8FC1790L,0xAD106161L,
332         0xB49521B6L,0xB1795747L,0xBF4DCC54L,0xBAA1BAA5L,
333         0x8C474DFAL,0x89AB3B0BL,0x879FA018L,0x8273D6E9L,
334         0x9BF6963EL,0x9E1AE0CFL,0x902E7BDCL,0x95C20D2DL,
335         0x406D4B42L,0x45813DB3L,0x4BB5A6A0L,0x4E59D051L,
336         0x57DC9086L,0x5230E677L,0x5C047D64L,0x59E80B95L,
337         0x6F0EFCCAL,0x6AE28A3BL,0x64D61128L,0x613A67D9L,
338         0x78BF270EL,0x7D5351FFL,0x7367CAECL,0x768BBC1DL,
339         0x1EAA2452L,0x1B4652A3L,0x1572C9B0L,0x109EBF41L,
340         0x091BFF96L,0x0CF78967L,0x02C31274L,0x072F6485L,
341         0x31C993DAL,0x3425E52BL,0x3A117E38L,0x3FFD08C9L,
342         0x2678481EL,0x23943EEFL,0x2DA0A5FCL,0x284CD30DL,
343         0x83125FD3L,0x86FE2922L,0x88CAB231L,0x8D26C4C0L,
344         0x94A38417L,0x914FF2E6L,0x9F7B69F5L,0x9A971F04L,
345         0xAC71E85BL,0xA99D9EAAL,0xA7A905B9L,0xA2457348L,
346         0xBBC0339FL,0xBE2C456EL,0xB018DE7DL,0xB5F4A88CL,
347         0xDDD530C3L,0xD8394632L,0xD60DDD21L,0xD3E1ABD0L,
348         0xCA64EB07L,0xCF889DF6L,0xC1BC06E5L,0xC4507014L,
349         0xF2B6874BL,0xF75AF1BAL,0xF96E6AA9L,0xFC821C58L,
350         0xE5075C8FL,0xE0EB2A7EL,0xEEDFB16DL,0xEB33C79CL,
351         0x3E9C81F3L,0x3B70F702L,0x35446C11L,0x30A81AE0L,
352         0x292D5A37L,0x2CC12CC6L,0x22F5B7D5L,0x2719C124L,
353         0x11FF367BL,0x1413408AL,0x1A27DB99L,0x1FCBAD68L,
354         0x064EEDBFL,0x03A29B4EL,0x0D96005DL,0x087A76ACL,
355         0x605BEEE3L,0x65B79812L,0x6B830301L,0x6E6F75F0L,
356         0x77EA3527L,0x720643D6L,0x7C32D8C5L,0x79DEAE34L,
357         0x4F38596BL,0x4AD42F9AL,0x44E0B489L,0x410CC278L,
358         0x588982AFL,0x5D65F45EL,0x53516F4DL,0x56BD19BCL
359 };
360
361 /*
362  * Calculate a 4-byte suffix to match desired CRC32C
363  *
364  * @current_crc: CRC32C checksum of all bytes before the suffix
365  * @desired_crc: the checksum that we want to get after adding the suffix
366  *
367  * Outputs: @suffix: pointer to where the suffix will be written (4-bytes)
368  */
369 static void find_collision_calc_suffix(unsigned long current_crc,
370                                        unsigned long desired_crc,
371                                        char *suffix)
372 {
373         int i;
374
375         for(i = 3; i >= 0; i--) {
376                 desired_crc = (desired_crc << 8)
377                             ^ crc32c_rev_table[desired_crc >> 24 & 0xFF]
378                             ^ ((current_crc >> i * 8) & 0xFF);
379         }
380         for (i = 0; i < 4; i++)
381                 suffix[i] = (desired_crc >> i * 8) & 0xFF;
382 }
383
384 /*
385  * Check if suffix is valid according to our file name conventions
386  */
387 static int find_collision_is_suffix_valid(const char *suffix)
388 {
389         int i;
390         char c;
391
392         for (i = 0; i < 4; i++) {
393                 c = suffix[i];
394                 if (c < ' ' || c > 126 || c == '/')
395                         return 0;
396         }
397         return 1;
398 }
399
400 static int find_collision_reverse_crc32c(struct name *val, u32 name_len)
401 {
402         unsigned long checksum;
403         unsigned long current_checksum;
404         int found = 0;
405         int i;
406
407         /* There are no same length collisions of 4 or less bytes */
408         if (name_len <= 4)
409                 return 0;
410         checksum = crc32c(~1, val->val, name_len);
411         name_len -= 4;
412         memset(val->sub, ' ', name_len);
413         i = 0;
414         while (1) {
415                 current_checksum = crc32c(~1, val->sub, name_len);
416                 find_collision_calc_suffix(current_checksum,
417                                            checksum,
418                                            val->sub + name_len);
419                 if (find_collision_is_suffix_valid(val->sub + name_len) &&
420                     memcmp(val->sub, val->val, val->len)) {
421                         found = 1;
422                         break;
423                 }
424
425                 if (val->sub[i] == 126) {
426                         do {
427                                 i++;
428                                 if (i >= name_len)
429                                         break;
430                         } while (val->sub[i] == 126);
431
432                         if (i >= name_len)
433                                 break;
434                         val->sub[i]++;
435                         if (val->sub[i] == '/')
436                                 val->sub[i]++;
437                         memset(val->sub, ' ', i);
438                         i = 0;
439                         continue;
440                 } else {
441                         val->sub[i]++;
442                         if (val->sub[i] == '/')
443                                 val->sub[i]++;
444                 }
445         }
446         return found;
447 }
448
449 static char *find_collision(struct rb_root *name_tree, char *name,
450                             u32 name_len)
451 {
452         struct name *val;
453         struct rb_node *entry;
454         struct name tmp;
455         int found;
456         int i;
457
458         tmp.val = name;
459         tmp.len = name_len;
460         entry = tree_search(name_tree, &tmp.n, name_cmp, 0);
461         if (entry) {
462                 val = rb_entry(entry, struct name, n);
463                 free(name);
464                 return val->sub;
465         }
466
467         val = malloc(sizeof(struct name));
468         if (!val) {
469                 error("cannot sanitize name, not enough memory");
470                 free(name);
471                 return NULL;
472         }
473
474         memset(val, 0, sizeof(*val));
475
476         val->val = name;
477         val->len = name_len;
478         val->sub = malloc(name_len);
479         if (!val->sub) {
480                 error("cannot sanitize name, not enough memory");
481                 free(val);
482                 free(name);
483                 return NULL;
484         }
485
486         found = find_collision_reverse_crc32c(val, name_len);
487
488         if (!found) {
489                 warning(
490 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
491                         val->len, val->val);
492                 for (i = 0; i < name_len; i++) {
493                         char c = rand_range(94) + 33;
494
495                         if (c == '/')
496                                 c++;
497                         val->sub[i] = c;
498                 }
499         }
500
501         tree_insert(name_tree, &val->n, name_cmp);
502         return val->sub;
503 }
504
505 static void sanitize_dir_item(enum sanitize_mode sanitize,
506                 struct rb_root *name_tree, struct extent_buffer *eb, int slot)
507 {
508         struct btrfs_dir_item *dir_item;
509         char *buf;
510         char *garbage;
511         unsigned long name_ptr;
512         u32 total_len;
513         u32 cur = 0;
514         u32 this_len;
515         u32 name_len;
516         int free_garbage = (sanitize == SANITIZE_NAMES);
517
518         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
519         total_len = btrfs_item_size_nr(eb, slot);
520         while (cur < total_len) {
521                 this_len = sizeof(*dir_item) +
522                         btrfs_dir_name_len(eb, dir_item) +
523                         btrfs_dir_data_len(eb, dir_item);
524                 name_ptr = (unsigned long)(dir_item + 1);
525                 name_len = btrfs_dir_name_len(eb, dir_item);
526
527                 if (sanitize == SANITIZE_COLLISIONS) {
528                         buf = malloc(name_len);
529                         if (!buf) {
530                                 error("cannot sanitize name, not enough memory");
531                                 return;
532                         }
533                         read_extent_buffer(eb, buf, name_ptr, name_len);
534                         garbage = find_collision(name_tree, buf, name_len);
535                 } else {
536                         garbage = generate_garbage(name_len);
537                 }
538                 if (!garbage) {
539                         error("cannot sanitize name, not enough memory");
540                         return;
541                 }
542                 write_extent_buffer(eb, garbage, name_ptr, name_len);
543                 cur += this_len;
544                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
545                                                      this_len);
546                 if (free_garbage)
547                         free(garbage);
548         }
549 }
550
551 static void sanitize_inode_ref(enum sanitize_mode sanitize,
552                 struct rb_root *name_tree, struct extent_buffer *eb, int slot,
553                 int ext)
554 {
555         struct btrfs_inode_extref *extref;
556         struct btrfs_inode_ref *ref;
557         char *garbage, *buf;
558         unsigned long ptr;
559         unsigned long name_ptr;
560         u32 item_size;
561         u32 cur_offset = 0;
562         int len;
563         int free_garbage = (sanitize == SANITIZE_NAMES);
564
565         item_size = btrfs_item_size_nr(eb, slot);
566         ptr = btrfs_item_ptr_offset(eb, slot);
567         while (cur_offset < item_size) {
568                 if (ext) {
569                         extref = (struct btrfs_inode_extref *)(ptr +
570                                                                cur_offset);
571                         name_ptr = (unsigned long)(&extref->name);
572                         len = btrfs_inode_extref_name_len(eb, extref);
573                         cur_offset += sizeof(*extref);
574                 } else {
575                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
576                         len = btrfs_inode_ref_name_len(eb, ref);
577                         name_ptr = (unsigned long)(ref + 1);
578                         cur_offset += sizeof(*ref);
579                 }
580                 cur_offset += len;
581
582                 if (sanitize == SANITIZE_COLLISIONS) {
583                         buf = malloc(len);
584                         if (!buf) {
585                                 error("cannot sanitize name, not enough memory");
586                                 return;
587                         }
588                         read_extent_buffer(eb, buf, name_ptr, len);
589                         garbage = find_collision(name_tree, buf, len);
590                 } else {
591                         garbage = generate_garbage(len);
592                 }
593
594                 if (!garbage) {
595                         error("cannot sanitize name, not enough memory");
596                         return;
597                 }
598                 write_extent_buffer(eb, garbage, name_ptr, len);
599                 if (free_garbage)
600                         free(garbage);
601         }
602 }
603
604 static void sanitize_xattr(struct extent_buffer *eb, int slot)
605 {
606         struct btrfs_dir_item *dir_item;
607         unsigned long data_ptr;
608         u32 data_len;
609
610         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
611         data_len = btrfs_dir_data_len(eb, dir_item);
612
613         data_ptr = (unsigned long)((char *)(dir_item + 1) +
614                                    btrfs_dir_name_len(eb, dir_item));
615         memset_extent_buffer(eb, 0, data_ptr, data_len);
616 }
617
618 static void sanitize_name(struct metadump_struct *md, u8 *dst,
619                           struct extent_buffer *src, struct btrfs_key *key,
620                           int slot)
621 {
622         struct extent_buffer *eb;
623
624         eb = alloc_dummy_eb(src->start, src->len);
625         if (!eb) {
626                 error("cannot sanitize name, not enough memory");
627                 return;
628         }
629
630         memcpy(eb->data, src->data, src->len);
631
632         switch (key->type) {
633         case BTRFS_DIR_ITEM_KEY:
634         case BTRFS_DIR_INDEX_KEY:
635                 sanitize_dir_item(md->sanitize_names, &md->name_tree, eb, slot);
636                 break;
637         case BTRFS_INODE_REF_KEY:
638                 sanitize_inode_ref(md->sanitize_names, &md->name_tree, eb, slot,
639                                 0);
640                 break;
641         case BTRFS_INODE_EXTREF_KEY:
642                 sanitize_inode_ref(md->sanitize_names, &md->name_tree, eb, slot,
643                                 1);
644                 break;
645         case BTRFS_XATTR_ITEM_KEY:
646                 sanitize_xattr(eb, slot);
647                 break;
648         default:
649                 break;
650         }
651
652         memcpy(dst, eb->data, eb->len);
653         free(eb);
654 }
655
656 /*
657  * zero inline extents and csum items
658  */
659 static void zero_items(struct metadump_struct *md, u8 *dst,
660                        struct extent_buffer *src)
661 {
662         struct btrfs_file_extent_item *fi;
663         struct btrfs_item *item;
664         struct btrfs_key key;
665         u32 nritems = btrfs_header_nritems(src);
666         size_t size;
667         unsigned long ptr;
668         int i, extent_type;
669
670         for (i = 0; i < nritems; i++) {
671                 item = btrfs_item_nr(i);
672                 btrfs_item_key_to_cpu(src, &key, i);
673                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
674                         size = btrfs_item_size_nr(src, i);
675                         memset(dst + btrfs_leaf_data(src) +
676                                btrfs_item_offset_nr(src, i), 0, size);
677                         continue;
678                 }
679
680                 if (md->sanitize_names && has_name(&key)) {
681                         sanitize_name(md, dst, src, &key, i);
682                         continue;
683                 }
684
685                 if (key.type != BTRFS_EXTENT_DATA_KEY)
686                         continue;
687
688                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
689                 extent_type = btrfs_file_extent_type(src, fi);
690                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
691                         continue;
692
693                 ptr = btrfs_file_extent_inline_start(fi);
694                 size = btrfs_file_extent_inline_item_len(src, item);
695                 memset(dst + ptr, 0, size);
696         }
697 }
698
699 /*
700  * copy buffer and zero useless data in the buffer
701  */
702 static void copy_buffer(struct metadump_struct *md, u8 *dst,
703                         struct extent_buffer *src)
704 {
705         int level;
706         size_t size;
707         u32 nritems;
708
709         memcpy(dst, src->data, src->len);
710         if (src->start == BTRFS_SUPER_INFO_OFFSET)
711                 return;
712
713         level = btrfs_header_level(src);
714         nritems = btrfs_header_nritems(src);
715
716         if (nritems == 0) {
717                 size = sizeof(struct btrfs_header);
718                 memset(dst + size, 0, src->len - size);
719         } else if (level == 0) {
720                 size = btrfs_leaf_data(src) +
721                         btrfs_item_offset_nr(src, nritems - 1) -
722                         btrfs_item_nr_offset(nritems);
723                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
724                 zero_items(md, dst, src);
725         } else {
726                 size = offsetof(struct btrfs_node, ptrs) +
727                         sizeof(struct btrfs_key_ptr) * nritems;
728                 memset(dst + size, 0, src->len - size);
729         }
730         csum_block(dst, src->len);
731 }
732
733 static void *dump_worker(void *data)
734 {
735         struct metadump_struct *md = (struct metadump_struct *)data;
736         struct async_work *async;
737         int ret;
738
739         while (1) {
740                 pthread_mutex_lock(&md->mutex);
741                 while (list_empty(&md->list)) {
742                         if (md->done) {
743                                 pthread_mutex_unlock(&md->mutex);
744                                 goto out;
745                         }
746                         pthread_cond_wait(&md->cond, &md->mutex);
747                 }
748                 async = list_entry(md->list.next, struct async_work, list);
749                 list_del_init(&async->list);
750                 pthread_mutex_unlock(&md->mutex);
751
752                 if (md->compress_level > 0) {
753                         u8 *orig = async->buffer;
754
755                         async->bufsize = compressBound(async->size);
756                         async->buffer = malloc(async->bufsize);
757                         if (!async->buffer) {
758                                 error("not enough memory for async buffer");
759                                 pthread_mutex_lock(&md->mutex);
760                                 if (!md->error)
761                                         md->error = -ENOMEM;
762                                 pthread_mutex_unlock(&md->mutex);
763                                 pthread_exit(NULL);
764                         }
765
766                         ret = compress2(async->buffer,
767                                          (unsigned long *)&async->bufsize,
768                                          orig, async->size, md->compress_level);
769
770                         if (ret != Z_OK)
771                                 async->error = 1;
772
773                         free(orig);
774                 }
775
776                 pthread_mutex_lock(&md->mutex);
777                 md->num_ready++;
778                 pthread_mutex_unlock(&md->mutex);
779         }
780 out:
781         pthread_exit(NULL);
782 }
783
784 static void meta_cluster_init(struct metadump_struct *md, u64 start)
785 {
786         struct meta_cluster_header *header;
787
788         md->num_items = 0;
789         md->num_ready = 0;
790         header = &md->cluster.header;
791         header->magic = cpu_to_le64(HEADER_MAGIC);
792         header->bytenr = cpu_to_le64(start);
793         header->nritems = cpu_to_le32(0);
794         header->compress = md->compress_level > 0 ?
795                            COMPRESS_ZLIB : COMPRESS_NONE;
796 }
797
798 static void metadump_destroy(struct metadump_struct *md, int num_threads)
799 {
800         int i;
801         struct rb_node *n;
802
803         pthread_mutex_lock(&md->mutex);
804         md->done = 1;
805         pthread_cond_broadcast(&md->cond);
806         pthread_mutex_unlock(&md->mutex);
807
808         for (i = 0; i < num_threads; i++)
809                 pthread_join(md->threads[i], NULL);
810
811         pthread_cond_destroy(&md->cond);
812         pthread_mutex_destroy(&md->mutex);
813
814         while ((n = rb_first(&md->name_tree))) {
815                 struct name *name;
816
817                 name = rb_entry(n, struct name, n);
818                 rb_erase(n, &md->name_tree);
819                 free(name->val);
820                 free(name->sub);
821                 free(name);
822         }
823 }
824
825 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
826                          FILE *out, int num_threads, int compress_level,
827                          enum sanitize_mode sanitize_names)
828 {
829         int i, ret = 0;
830
831         memset(md, 0, sizeof(*md));
832         INIT_LIST_HEAD(&md->list);
833         INIT_LIST_HEAD(&md->ordered);
834         md->root = root;
835         md->out = out;
836         md->pending_start = (u64)-1;
837         md->compress_level = compress_level;
838         md->sanitize_names = sanitize_names;
839         if (sanitize_names == SANITIZE_COLLISIONS)
840                 crc32c_optimization_init();
841
842         md->name_tree.rb_node = NULL;
843         md->num_threads = num_threads;
844         pthread_cond_init(&md->cond, NULL);
845         pthread_mutex_init(&md->mutex, NULL);
846         meta_cluster_init(md, 0);
847
848         if (!num_threads)
849                 return 0;
850
851         for (i = 0; i < num_threads; i++) {
852                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
853                 if (ret)
854                         break;
855         }
856
857         if (ret)
858                 metadump_destroy(md, i + 1);
859
860         return ret;
861 }
862
863 static int write_zero(FILE *out, size_t size)
864 {
865         static char zero[BLOCK_SIZE];
866         return fwrite(zero, size, 1, out);
867 }
868
869 static int write_buffers(struct metadump_struct *md, u64 *next)
870 {
871         struct meta_cluster_header *header = &md->cluster.header;
872         struct meta_cluster_item *item;
873         struct async_work *async;
874         u64 bytenr = 0;
875         u32 nritems = 0;
876         int ret;
877         int err = 0;
878
879         if (list_empty(&md->ordered))
880                 goto out;
881
882         /* wait until all buffers are compressed */
883         while (!err && md->num_items > md->num_ready) {
884                 struct timespec ts = {
885                         .tv_sec = 0,
886                         .tv_nsec = 10000000,
887                 };
888                 pthread_mutex_unlock(&md->mutex);
889                 nanosleep(&ts, NULL);
890                 pthread_mutex_lock(&md->mutex);
891                 err = md->error;
892         }
893
894         if (err) {
895                 error("one of the threads failed: %s", strerror(-err));
896                 goto out;
897         }
898
899         /* setup and write index block */
900         list_for_each_entry(async, &md->ordered, ordered) {
901                 item = &md->cluster.items[nritems];
902                 item->bytenr = cpu_to_le64(async->start);
903                 item->size = cpu_to_le32(async->bufsize);
904                 nritems++;
905         }
906         header->nritems = cpu_to_le32(nritems);
907
908         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
909         if (ret != 1) {
910                 error("unable to write out cluster: %s", strerror(errno));
911                 return -errno;
912         }
913
914         /* write buffers */
915         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
916         while (!list_empty(&md->ordered)) {
917                 async = list_entry(md->ordered.next, struct async_work,
918                                    ordered);
919                 list_del_init(&async->ordered);
920
921                 bytenr += async->bufsize;
922                 if (!err)
923                         ret = fwrite(async->buffer, async->bufsize, 1,
924                                      md->out);
925                 if (ret != 1) {
926                         error("unable to write out cluster: %s",
927                                 strerror(errno));
928                         err = -errno;
929                         ret = 0;
930                 }
931
932                 free(async->buffer);
933                 free(async);
934         }
935
936         /* zero unused space in the last block */
937         if (!err && bytenr & BLOCK_MASK) {
938                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
939
940                 bytenr += size;
941                 ret = write_zero(md->out, size);
942                 if (ret != 1) {
943                         error("unable to zero out buffer: %s",
944                                 strerror(errno));
945                         err = -errno;
946                 }
947         }
948 out:
949         *next = bytenr;
950         return err;
951 }
952
953 static int read_data_extent(struct metadump_struct *md,
954                             struct async_work *async)
955 {
956         struct btrfs_root *root = md->root;
957         struct btrfs_fs_info *fs_info = root->fs_info;
958         u64 bytes_left = async->size;
959         u64 logical = async->start;
960         u64 offset = 0;
961         u64 read_len;
962         int num_copies;
963         int cur_mirror;
964         int ret;
965
966         num_copies = btrfs_num_copies(root->fs_info, logical, bytes_left);
967
968         /* Try our best to read data, just like read_tree_block() */
969         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
970                 while (bytes_left) {
971                         read_len = bytes_left;
972                         ret = read_extent_data(fs_info,
973                                         (char *)(async->buffer + offset),
974                                         logical, &read_len, cur_mirror);
975                         if (ret < 0)
976                                 break;
977                         offset += read_len;
978                         logical += read_len;
979                         bytes_left -= read_len;
980                 }
981         }
982         if (bytes_left)
983                 return -EIO;
984         return 0;
985 }
986
987 static int get_dev_fd(struct btrfs_root *root)
988 {
989         struct btrfs_device *dev;
990
991         dev = list_first_entry(&root->fs_info->fs_devices->devices,
992                                struct btrfs_device, dev_list);
993         return dev->fd;
994 }
995
996 static int flush_pending(struct metadump_struct *md, int done)
997 {
998         struct async_work *async = NULL;
999         struct extent_buffer *eb;
1000         u64 start = 0;
1001         u64 size;
1002         size_t offset;
1003         int ret = 0;
1004
1005         if (md->pending_size) {
1006                 async = calloc(1, sizeof(*async));
1007                 if (!async)
1008                         return -ENOMEM;
1009
1010                 async->start = md->pending_start;
1011                 async->size = md->pending_size;
1012                 async->bufsize = async->size;
1013                 async->buffer = malloc(async->bufsize);
1014                 if (!async->buffer) {
1015                         free(async);
1016                         return -ENOMEM;
1017                 }
1018                 offset = 0;
1019                 start = async->start;
1020                 size = async->size;
1021
1022                 if (md->data) {
1023                         ret = read_data_extent(md, async);
1024                         if (ret) {
1025                                 free(async->buffer);
1026                                 free(async);
1027                                 return ret;
1028                         }
1029                 }
1030
1031                 /*
1032                  * Balance can make the mapping not cover the super block, so
1033                  * just copy directly from one of the devices.
1034                  */
1035                 if (start == BTRFS_SUPER_INFO_OFFSET) {
1036                         int fd = get_dev_fd(md->root);
1037
1038                         ret = pread64(fd, async->buffer, size, start);
1039                         if (ret < size) {
1040                                 free(async->buffer);
1041                                 free(async);
1042                                 error("unable to read superblock at %llu: %s",
1043                                                 (unsigned long long)start,
1044                                                 strerror(errno));
1045                                 return -errno;
1046                         }
1047                         size = 0;
1048                         ret = 0;
1049                 }
1050
1051                 while (!md->data && size > 0) {
1052                         u64 this_read = min((u64)md->root->fs_info->nodesize,
1053                                         size);
1054
1055                         eb = read_tree_block(md->root->fs_info, start, 0);
1056                         if (!extent_buffer_uptodate(eb)) {
1057                                 free(async->buffer);
1058                                 free(async);
1059                                 error("unable to read metadata block %llu",
1060                                         (unsigned long long)start);
1061                                 return -EIO;
1062                         }
1063                         copy_buffer(md, async->buffer + offset, eb);
1064                         free_extent_buffer(eb);
1065                         start += this_read;
1066                         offset += this_read;
1067                         size -= this_read;
1068                 }
1069
1070                 md->pending_start = (u64)-1;
1071                 md->pending_size = 0;
1072         } else if (!done) {
1073                 return 0;
1074         }
1075
1076         pthread_mutex_lock(&md->mutex);
1077         if (async) {
1078                 list_add_tail(&async->ordered, &md->ordered);
1079                 md->num_items++;
1080                 if (md->compress_level > 0) {
1081                         list_add_tail(&async->list, &md->list);
1082                         pthread_cond_signal(&md->cond);
1083                 } else {
1084                         md->num_ready++;
1085                 }
1086         }
1087         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1088                 ret = write_buffers(md, &start);
1089                 if (ret)
1090                         error("unable to write buffers: %s", strerror(-ret));
1091                 else
1092                         meta_cluster_init(md, start);
1093         }
1094         pthread_mutex_unlock(&md->mutex);
1095         return ret;
1096 }
1097
1098 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1099                       int data)
1100 {
1101         int ret;
1102         if (md->data != data ||
1103             md->pending_size + size > MAX_PENDING_SIZE ||
1104             md->pending_start + md->pending_size != start) {
1105                 ret = flush_pending(md, 0);
1106                 if (ret)
1107                         return ret;
1108                 md->pending_start = start;
1109         }
1110         readahead_tree_block(md->root->fs_info, start, 0);
1111         md->pending_size += size;
1112         md->data = data;
1113         return 0;
1114 }
1115
1116 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1117 static int is_tree_block(struct btrfs_root *extent_root,
1118                          struct btrfs_path *path, u64 bytenr)
1119 {
1120         struct extent_buffer *leaf;
1121         struct btrfs_key key;
1122         u64 ref_objectid;
1123         int ret;
1124
1125         leaf = path->nodes[0];
1126         while (1) {
1127                 struct btrfs_extent_ref_v0 *ref_item;
1128                 path->slots[0]++;
1129                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1130                         ret = btrfs_next_leaf(extent_root, path);
1131                         if (ret < 0)
1132                                 return ret;
1133                         if (ret > 0)
1134                                 break;
1135                         leaf = path->nodes[0];
1136                 }
1137                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1138                 if (key.objectid != bytenr)
1139                         break;
1140                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1141                         continue;
1142                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1143                                           struct btrfs_extent_ref_v0);
1144                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1145                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1146                         return 1;
1147                 break;
1148         }
1149         return 0;
1150 }
1151 #endif
1152
1153 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1154                             struct metadump_struct *metadump, int root_tree)
1155 {
1156         struct extent_buffer *tmp;
1157         struct btrfs_root_item *ri;
1158         struct btrfs_key key;
1159         struct btrfs_fs_info *fs_info = root->fs_info;
1160         u64 bytenr;
1161         int level;
1162         int nritems = 0;
1163         int i = 0;
1164         int ret;
1165
1166         ret = add_extent(btrfs_header_bytenr(eb), fs_info->nodesize,
1167                          metadump, 0);
1168         if (ret) {
1169                 error("unable to add metadata block %llu: %d",
1170                                 btrfs_header_bytenr(eb), ret);
1171                 return ret;
1172         }
1173
1174         if (btrfs_header_level(eb) == 0 && !root_tree)
1175                 return 0;
1176
1177         level = btrfs_header_level(eb);
1178         nritems = btrfs_header_nritems(eb);
1179         for (i = 0; i < nritems; i++) {
1180                 if (level == 0) {
1181                         btrfs_item_key_to_cpu(eb, &key, i);
1182                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1183                                 continue;
1184                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1185                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1186                         tmp = read_tree_block(fs_info, bytenr, 0);
1187                         if (!extent_buffer_uptodate(tmp)) {
1188                                 error("unable to read log root block");
1189                                 return -EIO;
1190                         }
1191                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1192                         free_extent_buffer(tmp);
1193                         if (ret)
1194                                 return ret;
1195                 } else {
1196                         bytenr = btrfs_node_blockptr(eb, i);
1197                         tmp = read_tree_block(fs_info, bytenr, 0);
1198                         if (!extent_buffer_uptodate(tmp)) {
1199                                 error("unable to read log root block");
1200                                 return -EIO;
1201                         }
1202                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1203                         free_extent_buffer(tmp);
1204                         if (ret)
1205                                 return ret;
1206                 }
1207         }
1208
1209         return 0;
1210 }
1211
1212 static int copy_log_trees(struct btrfs_root *root,
1213                           struct metadump_struct *metadump)
1214 {
1215         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1216
1217         if (blocknr == 0)
1218                 return 0;
1219
1220         if (!root->fs_info->log_root_tree ||
1221             !root->fs_info->log_root_tree->node) {
1222                 error("unable to copy tree log, it has not been setup");
1223                 return -EIO;
1224         }
1225
1226         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1227                                 metadump, 1);
1228 }
1229
1230 static int copy_space_cache(struct btrfs_root *root,
1231                             struct metadump_struct *metadump,
1232                             struct btrfs_path *path)
1233 {
1234         struct extent_buffer *leaf;
1235         struct btrfs_file_extent_item *fi;
1236         struct btrfs_key key;
1237         u64 bytenr, num_bytes;
1238         int ret;
1239
1240         root = root->fs_info->tree_root;
1241
1242         key.objectid = 0;
1243         key.type = BTRFS_EXTENT_DATA_KEY;
1244         key.offset = 0;
1245
1246         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1247         if (ret < 0) {
1248                 error("free space inode not found: %d", ret);
1249                 return ret;
1250         }
1251
1252         leaf = path->nodes[0];
1253
1254         while (1) {
1255                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1256                         ret = btrfs_next_leaf(root, path);
1257                         if (ret < 0) {
1258                                 error("cannot go to next leaf %d", ret);
1259                                 return ret;
1260                         }
1261                         if (ret > 0)
1262                                 break;
1263                         leaf = path->nodes[0];
1264                 }
1265
1266                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1267                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1268                         path->slots[0]++;
1269                         continue;
1270                 }
1271
1272                 fi = btrfs_item_ptr(leaf, path->slots[0],
1273                                     struct btrfs_file_extent_item);
1274                 if (btrfs_file_extent_type(leaf, fi) !=
1275                     BTRFS_FILE_EXTENT_REG) {
1276                         path->slots[0]++;
1277                         continue;
1278                 }
1279
1280                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1281                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1282                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1283                 if (ret) {
1284                         error("unable to add space cache blocks %d", ret);
1285                         btrfs_release_path(path);
1286                         return ret;
1287                 }
1288                 path->slots[0]++;
1289         }
1290
1291         return 0;
1292 }
1293
1294 static int copy_from_extent_tree(struct metadump_struct *metadump,
1295                                  struct btrfs_path *path)
1296 {
1297         struct btrfs_root *extent_root;
1298         struct extent_buffer *leaf;
1299         struct btrfs_extent_item *ei;
1300         struct btrfs_key key;
1301         u64 bytenr;
1302         u64 num_bytes;
1303         int ret;
1304
1305         extent_root = metadump->root->fs_info->extent_root;
1306         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1307         key.objectid = bytenr;
1308         key.type = BTRFS_EXTENT_ITEM_KEY;
1309         key.offset = 0;
1310
1311         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1312         if (ret < 0) {
1313                 error("extent root not found: %d", ret);
1314                 return ret;
1315         }
1316         ret = 0;
1317
1318         leaf = path->nodes[0];
1319
1320         while (1) {
1321                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1322                         ret = btrfs_next_leaf(extent_root, path);
1323                         if (ret < 0) {
1324                                 error("cannot go to next leaf %d", ret);
1325                                 break;
1326                         }
1327                         if (ret > 0) {
1328                                 ret = 0;
1329                                 break;
1330                         }
1331                         leaf = path->nodes[0];
1332                 }
1333
1334                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1335                 if (key.objectid < bytenr ||
1336                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1337                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1338                         path->slots[0]++;
1339                         continue;
1340                 }
1341
1342                 bytenr = key.objectid;
1343                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1344                         num_bytes = extent_root->fs_info->nodesize;
1345                 } else {
1346                         num_bytes = key.offset;
1347                 }
1348
1349                 if (num_bytes == 0) {
1350                         error("extent length 0 at bytenr %llu key type %d",
1351                                         (unsigned long long)bytenr, key.type);
1352                         ret = -EIO;
1353                         break;
1354                 }
1355
1356                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1357                         ei = btrfs_item_ptr(leaf, path->slots[0],
1358                                             struct btrfs_extent_item);
1359                         if (btrfs_extent_flags(leaf, ei) &
1360                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1361                                 ret = add_extent(bytenr, num_bytes, metadump,
1362                                                  0);
1363                                 if (ret) {
1364                                         error("unable to add block %llu: %d",
1365                                                 (unsigned long long)bytenr, ret);
1366                                         break;
1367                                 }
1368                         }
1369                 } else {
1370 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1371                         ret = is_tree_block(extent_root, path, bytenr);
1372                         if (ret < 0) {
1373                                 error("failed to check tree block %llu: %d",
1374                                         (unsigned long long)bytenr, ret);
1375                                 break;
1376                         }
1377
1378                         if (ret) {
1379                                 ret = add_extent(bytenr, num_bytes, metadump,
1380                                                  0);
1381                                 if (ret) {
1382                                         error("unable to add block %llu: %d",
1383                                                 (unsigned long long)bytenr, ret);
1384                                         break;
1385                                 }
1386                         }
1387                         ret = 0;
1388 #else
1389                         error(
1390         "either extent tree is corrupted or you haven't built with V0 support");
1391                         ret = -EIO;
1392                         break;
1393 #endif
1394                 }
1395                 bytenr += num_bytes;
1396         }
1397
1398         btrfs_release_path(path);
1399
1400         return ret;
1401 }
1402
1403 static int create_metadump(const char *input, FILE *out, int num_threads,
1404                            int compress_level, enum sanitize_mode sanitize,
1405                            int walk_trees)
1406 {
1407         struct btrfs_root *root;
1408         struct btrfs_path path;
1409         struct metadump_struct metadump;
1410         int ret;
1411         int err = 0;
1412
1413         root = open_ctree(input, 0, 0);
1414         if (!root) {
1415                 error("open ctree failed");
1416                 return -EIO;
1417         }
1418
1419         ret = metadump_init(&metadump, root, out, num_threads,
1420                             compress_level, sanitize);
1421         if (ret) {
1422                 error("failed to initialize metadump: %d", ret);
1423                 close_ctree(root);
1424                 return ret;
1425         }
1426
1427         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1428                         &metadump, 0);
1429         if (ret) {
1430                 error("unable to add metadata: %d", ret);
1431                 err = ret;
1432                 goto out;
1433         }
1434
1435         btrfs_init_path(&path);
1436
1437         if (walk_trees) {
1438                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1439                                        &metadump, 1);
1440                 if (ret) {
1441                         err = ret;
1442                         goto out;
1443                 }
1444
1445                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1446                                        &metadump, 1);
1447                 if (ret) {
1448                         err = ret;
1449                         goto out;
1450                 }
1451         } else {
1452                 ret = copy_from_extent_tree(&metadump, &path);
1453                 if (ret) {
1454                         err = ret;
1455                         goto out;
1456                 }
1457         }
1458
1459         ret = copy_log_trees(root, &metadump);
1460         if (ret) {
1461                 err = ret;
1462                 goto out;
1463         }
1464
1465         ret = copy_space_cache(root, &metadump, &path);
1466 out:
1467         ret = flush_pending(&metadump, 1);
1468         if (ret) {
1469                 if (!err)
1470                         err = ret;
1471                 error("failed to flush pending data: %d", ret);
1472         }
1473
1474         metadump_destroy(&metadump, num_threads);
1475
1476         btrfs_release_path(&path);
1477         ret = close_ctree(root);
1478         return err ? err : ret;
1479 }
1480
1481 static void update_super_old(u8 *buffer)
1482 {
1483         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1484         struct btrfs_chunk *chunk;
1485         struct btrfs_disk_key *key;
1486         u32 sectorsize = btrfs_super_sectorsize(super);
1487         u64 flags = btrfs_super_flags(super);
1488
1489         flags |= BTRFS_SUPER_FLAG_METADUMP;
1490         btrfs_set_super_flags(super, flags);
1491
1492         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1493         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1494                                        sizeof(struct btrfs_disk_key));
1495
1496         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1497         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1498         btrfs_set_disk_key_offset(key, 0);
1499
1500         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1501         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1502         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1503         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1504         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1505         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1506         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1507         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1508         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1509         chunk->stripe.devid = super->dev_item.devid;
1510         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1511         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1512         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1513         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1514 }
1515
1516 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1517 {
1518         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1519         struct btrfs_chunk *chunk;
1520         struct btrfs_disk_key *disk_key;
1521         struct btrfs_key key;
1522         u64 flags = btrfs_super_flags(super);
1523         u32 new_array_size = 0;
1524         u32 array_size;
1525         u32 cur = 0;
1526         u8 *ptr, *write_ptr;
1527         int old_num_stripes;
1528
1529         write_ptr = ptr = super->sys_chunk_array;
1530         array_size = btrfs_super_sys_array_size(super);
1531
1532         while (cur < array_size) {
1533                 disk_key = (struct btrfs_disk_key *)ptr;
1534                 btrfs_disk_key_to_cpu(&key, disk_key);
1535
1536                 new_array_size += sizeof(*disk_key);
1537                 memmove(write_ptr, ptr, sizeof(*disk_key));
1538
1539                 write_ptr += sizeof(*disk_key);
1540                 ptr += sizeof(*disk_key);
1541                 cur += sizeof(*disk_key);
1542
1543                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1544                         u64 type, physical, physical_dup, size = 0;
1545
1546                         chunk = (struct btrfs_chunk *)ptr;
1547                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1548                         chunk = (struct btrfs_chunk *)write_ptr;
1549
1550                         memmove(write_ptr, ptr, sizeof(*chunk));
1551                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1552                         type = btrfs_stack_chunk_type(chunk);
1553                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1554                                 new_array_size += sizeof(struct btrfs_stripe);
1555                                 write_ptr += sizeof(struct btrfs_stripe);
1556                         } else {
1557                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1558                                 btrfs_set_stack_chunk_type(chunk,
1559                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1560                         }
1561                         chunk->stripe.devid = super->dev_item.devid;
1562                         physical = logical_to_physical(mdres, key.offset,
1563                                                        &size, &physical_dup);
1564                         if (size != (u64)-1)
1565                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1566                                                               physical);
1567                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1568                                BTRFS_UUID_SIZE);
1569                         new_array_size += sizeof(*chunk);
1570                 } else {
1571                         error("bogus key in the sys array %d", key.type);
1572                         return -EIO;
1573                 }
1574                 write_ptr += sizeof(*chunk);
1575                 ptr += btrfs_chunk_item_size(old_num_stripes);
1576                 cur += btrfs_chunk_item_size(old_num_stripes);
1577         }
1578
1579         if (mdres->clear_space_cache)
1580                 btrfs_set_super_cache_generation(super, 0);
1581
1582         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1583         btrfs_set_super_flags(super, flags);
1584         btrfs_set_super_sys_array_size(super, new_array_size);
1585         btrfs_set_super_num_devices(super, 1);
1586         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1587
1588         return 0;
1589 }
1590
1591 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1592 {
1593         struct extent_buffer *eb;
1594
1595         eb = calloc(1, sizeof(struct extent_buffer) + size);
1596         if (!eb)
1597                 return NULL;
1598
1599         eb->start = bytenr;
1600         eb->len = size;
1601         return eb;
1602 }
1603
1604 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1605 {
1606         struct btrfs_item *item;
1607         u32 nritems;
1608         u32 old_size;
1609         u32 old_data_start;
1610         u32 size_diff;
1611         u32 data_end;
1612         int i;
1613
1614         old_size = btrfs_item_size_nr(eb, slot);
1615         if (old_size == new_size)
1616                 return;
1617
1618         nritems = btrfs_header_nritems(eb);
1619         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1620
1621         old_data_start = btrfs_item_offset_nr(eb, slot);
1622         size_diff = old_size - new_size;
1623
1624         for (i = slot; i < nritems; i++) {
1625                 u32 ioff;
1626                 item = btrfs_item_nr(i);
1627                 ioff = btrfs_item_offset(eb, item);
1628                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1629         }
1630
1631         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1632                               btrfs_leaf_data(eb) + data_end,
1633                               old_data_start + new_size - data_end);
1634         item = btrfs_item_nr(slot);
1635         btrfs_set_item_size(eb, item, new_size);
1636 }
1637
1638 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1639                                   struct async_work *async, u8 *buffer,
1640                                   size_t size)
1641 {
1642         struct extent_buffer *eb;
1643         size_t size_left = size;
1644         u64 bytenr = async->start;
1645         int i;
1646
1647         if (size_left % mdres->nodesize)
1648                 return 0;
1649
1650         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1651         if (!eb)
1652                 return -ENOMEM;
1653
1654         while (size_left) {
1655                 eb->start = bytenr;
1656                 memcpy(eb->data, buffer, mdres->nodesize);
1657
1658                 if (btrfs_header_bytenr(eb) != bytenr)
1659                         break;
1660                 if (memcmp(mdres->fsid,
1661                            eb->data + offsetof(struct btrfs_header, fsid),
1662                            BTRFS_FSID_SIZE))
1663                         break;
1664
1665                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1666                         goto next;
1667
1668                 if (btrfs_header_level(eb) != 0)
1669                         goto next;
1670
1671                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1672                         struct btrfs_chunk *chunk;
1673                         struct btrfs_key key;
1674                         u64 type, physical, physical_dup, size = (u64)-1;
1675
1676                         btrfs_item_key_to_cpu(eb, &key, i);
1677                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1678                                 continue;
1679
1680                         size = 0;
1681                         physical = logical_to_physical(mdres, key.offset,
1682                                                        &size, &physical_dup);
1683
1684                         if (!physical_dup)
1685                                 truncate_item(eb, i, sizeof(*chunk));
1686                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1687
1688
1689                         /* Zero out the RAID profile */
1690                         type = btrfs_chunk_type(eb, chunk);
1691                         type &= (BTRFS_BLOCK_GROUP_DATA |
1692                                  BTRFS_BLOCK_GROUP_SYSTEM |
1693                                  BTRFS_BLOCK_GROUP_METADATA |
1694                                  BTRFS_BLOCK_GROUP_DUP);
1695                         btrfs_set_chunk_type(eb, chunk, type);
1696
1697                         if (!physical_dup)
1698                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1699                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1700                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1701                         if (size != (u64)-1)
1702                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1703                                                            physical);
1704                         /* update stripe 2 offset */
1705                         if (physical_dup)
1706                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1707                                                            physical_dup);
1708
1709                         write_extent_buffer(eb, mdres->uuid,
1710                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1711                                                 chunk, 0),
1712                                         BTRFS_UUID_SIZE);
1713                 }
1714                 memcpy(buffer, eb->data, eb->len);
1715                 csum_block(buffer, eb->len);
1716 next:
1717                 size_left -= mdres->nodesize;
1718                 buffer += mdres->nodesize;
1719                 bytenr += mdres->nodesize;
1720         }
1721
1722         free(eb);
1723         return 0;
1724 }
1725
1726 static void write_backup_supers(int fd, u8 *buf)
1727 {
1728         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1729         struct stat st;
1730         u64 size;
1731         u64 bytenr;
1732         int i;
1733         int ret;
1734
1735         if (fstat(fd, &st)) {
1736                 error(
1737         "cannot stat restore point, won't be able to write backup supers: %s",
1738                         strerror(errno));
1739                 return;
1740         }
1741
1742         size = btrfs_device_size(fd, &st);
1743
1744         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1745                 bytenr = btrfs_sb_offset(i);
1746                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1747                         break;
1748                 btrfs_set_super_bytenr(super, bytenr);
1749                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1750                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1751                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1752                         if (ret < 0)
1753                                 error(
1754                                 "problem writing out backup super block %d: %s",
1755                                                 i, strerror(errno));
1756                         else
1757                                 error("short write writing out backup super block");
1758                         break;
1759                 }
1760         }
1761 }
1762
1763 static void *restore_worker(void *data)
1764 {
1765         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1766         struct async_work *async;
1767         size_t size;
1768         u8 *buffer;
1769         u8 *outbuf;
1770         int outfd;
1771         int ret;
1772         int compress_size = MAX_PENDING_SIZE * 4;
1773
1774         outfd = fileno(mdres->out);
1775         buffer = malloc(compress_size);
1776         if (!buffer) {
1777                 error("not enough memory for restore worker buffer");
1778                 pthread_mutex_lock(&mdres->mutex);
1779                 if (!mdres->error)
1780                         mdres->error = -ENOMEM;
1781                 pthread_mutex_unlock(&mdres->mutex);
1782                 pthread_exit(NULL);
1783         }
1784
1785         while (1) {
1786                 u64 bytenr, physical_dup;
1787                 off_t offset = 0;
1788                 int err = 0;
1789
1790                 pthread_mutex_lock(&mdres->mutex);
1791                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1792                         if (mdres->done) {
1793                                 pthread_mutex_unlock(&mdres->mutex);
1794                                 goto out;
1795                         }
1796                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1797                 }
1798                 async = list_entry(mdres->list.next, struct async_work, list);
1799                 list_del_init(&async->list);
1800
1801                 if (mdres->compress_method == COMPRESS_ZLIB) {
1802                         size = compress_size; 
1803                         pthread_mutex_unlock(&mdres->mutex);
1804                         ret = uncompress(buffer, (unsigned long *)&size,
1805                                          async->buffer, async->bufsize);
1806                         pthread_mutex_lock(&mdres->mutex);
1807                         if (ret != Z_OK) {
1808                                 error("decompression failed with %d", ret);
1809                                 err = -EIO;
1810                         }
1811                         outbuf = buffer;
1812                 } else {
1813                         outbuf = async->buffer;
1814                         size = async->bufsize;
1815                 }
1816
1817                 if (!mdres->multi_devices) {
1818                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1819                                 if (mdres->old_restore) {
1820                                         update_super_old(outbuf);
1821                                 } else {
1822                                         ret = update_super(mdres, outbuf);
1823                                         if (ret)
1824                                                 err = ret;
1825                                 }
1826                         } else if (!mdres->old_restore) {
1827                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1828                                 if (ret)
1829                                         err = ret;
1830                         }
1831                 }
1832
1833                 if (!mdres->fixup_offset) {
1834                         while (size) {
1835                                 u64 chunk_size = size;
1836                                 physical_dup = 0;
1837                                 if (!mdres->multi_devices && !mdres->old_restore)
1838                                         bytenr = logical_to_physical(mdres,
1839                                                      async->start + offset,
1840                                                      &chunk_size,
1841                                                      &physical_dup);
1842                                 else
1843                                         bytenr = async->start + offset;
1844
1845                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1846                                                bytenr);
1847                                 if (ret != chunk_size)
1848                                         goto error;
1849
1850                                 if (physical_dup)
1851                                         ret = pwrite64(outfd, outbuf+offset,
1852                                                        chunk_size,
1853                                                        physical_dup);
1854                                 if (ret != chunk_size)
1855                                         goto error;
1856
1857                                 size -= chunk_size;
1858                                 offset += chunk_size;
1859                                 continue;
1860
1861 error:
1862                                 if (ret < 0) {
1863                                         error("unable to write to device: %s",
1864                                                         strerror(errno));
1865                                         err = errno;
1866                                 } else {
1867                                         error("short write");
1868                                         err = -EIO;
1869                                 }
1870                         }
1871                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1872                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1873                         if (ret) {
1874                                 error("failed to write data");
1875                                 exit(1);
1876                         }
1877                 }
1878
1879
1880                 /* backup super blocks are already there at fixup_offset stage */
1881                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1882                         write_backup_supers(outfd, outbuf);
1883
1884                 if (err && !mdres->error)
1885                         mdres->error = err;
1886                 mdres->num_items--;
1887                 pthread_mutex_unlock(&mdres->mutex);
1888
1889                 free(async->buffer);
1890                 free(async);
1891         }
1892 out:
1893         free(buffer);
1894         pthread_exit(NULL);
1895 }
1896
1897 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1898 {
1899         struct rb_node *n;
1900         int i;
1901
1902         while ((n = rb_first(&mdres->chunk_tree))) {
1903                 struct fs_chunk *entry;
1904
1905                 entry = rb_entry(n, struct fs_chunk, l);
1906                 rb_erase(n, &mdres->chunk_tree);
1907                 rb_erase(&entry->p, &mdres->physical_tree);
1908                 free(entry);
1909         }
1910         pthread_mutex_lock(&mdres->mutex);
1911         mdres->done = 1;
1912         pthread_cond_broadcast(&mdres->cond);
1913         pthread_mutex_unlock(&mdres->mutex);
1914
1915         for (i = 0; i < num_threads; i++)
1916                 pthread_join(mdres->threads[i], NULL);
1917
1918         pthread_cond_destroy(&mdres->cond);
1919         pthread_mutex_destroy(&mdres->mutex);
1920 }
1921
1922 static int mdrestore_init(struct mdrestore_struct *mdres,
1923                           FILE *in, FILE *out, int old_restore,
1924                           int num_threads, int fixup_offset,
1925                           struct btrfs_fs_info *info, int multi_devices)
1926 {
1927         int i, ret = 0;
1928
1929         memset(mdres, 0, sizeof(*mdres));
1930         pthread_cond_init(&mdres->cond, NULL);
1931         pthread_mutex_init(&mdres->mutex, NULL);
1932         INIT_LIST_HEAD(&mdres->list);
1933         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1934         mdres->in = in;
1935         mdres->out = out;
1936         mdres->old_restore = old_restore;
1937         mdres->chunk_tree.rb_node = NULL;
1938         mdres->fixup_offset = fixup_offset;
1939         mdres->info = info;
1940         mdres->multi_devices = multi_devices;
1941         mdres->clear_space_cache = 0;
1942         mdres->last_physical_offset = 0;
1943         mdres->alloced_chunks = 0;
1944
1945         if (!num_threads)
1946                 return 0;
1947
1948         mdres->num_threads = num_threads;
1949         for (i = 0; i < num_threads; i++) {
1950                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1951                                      mdres);
1952                 if (ret) {
1953                         /* pthread_create returns errno directly */
1954                         ret = -ret;
1955                         break;
1956                 }
1957         }
1958         if (ret)
1959                 mdrestore_destroy(mdres, i + 1);
1960         return ret;
1961 }
1962
1963 static int fill_mdres_info(struct mdrestore_struct *mdres,
1964                            struct async_work *async)
1965 {
1966         struct btrfs_super_block *super;
1967         u8 *buffer = NULL;
1968         u8 *outbuf;
1969         int ret;
1970
1971         /* We've already been initialized */
1972         if (mdres->nodesize)
1973                 return 0;
1974
1975         if (mdres->compress_method == COMPRESS_ZLIB) {
1976                 size_t size = MAX_PENDING_SIZE * 2;
1977
1978                 buffer = malloc(MAX_PENDING_SIZE * 2);
1979                 if (!buffer)
1980                         return -ENOMEM;
1981                 ret = uncompress(buffer, (unsigned long *)&size,
1982                                  async->buffer, async->bufsize);
1983                 if (ret != Z_OK) {
1984                         error("decompression failed with %d", ret);
1985                         free(buffer);
1986                         return -EIO;
1987                 }
1988                 outbuf = buffer;
1989         } else {
1990                 outbuf = async->buffer;
1991         }
1992
1993         super = (struct btrfs_super_block *)outbuf;
1994         mdres->nodesize = btrfs_super_nodesize(super);
1995         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1996         memcpy(mdres->uuid, super->dev_item.uuid,
1997                        BTRFS_UUID_SIZE);
1998         mdres->devid = le64_to_cpu(super->dev_item.devid);
1999         free(buffer);
2000         return 0;
2001 }
2002
2003 static int add_cluster(struct meta_cluster *cluster,
2004                        struct mdrestore_struct *mdres, u64 *next)
2005 {
2006         struct meta_cluster_item *item;
2007         struct meta_cluster_header *header = &cluster->header;
2008         struct async_work *async;
2009         u64 bytenr;
2010         u32 i, nritems;
2011         int ret;
2012
2013         pthread_mutex_lock(&mdres->mutex);
2014         mdres->compress_method = header->compress;
2015         pthread_mutex_unlock(&mdres->mutex);
2016
2017         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
2018         nritems = le32_to_cpu(header->nritems);
2019         for (i = 0; i < nritems; i++) {
2020                 item = &cluster->items[i];
2021                 async = calloc(1, sizeof(*async));
2022                 if (!async) {
2023                         error("not enough memory for async data");
2024                         return -ENOMEM;
2025                 }
2026                 async->start = le64_to_cpu(item->bytenr);
2027                 async->bufsize = le32_to_cpu(item->size);
2028                 async->buffer = malloc(async->bufsize);
2029                 if (!async->buffer) {
2030                         error("not enough memory for async buffer");
2031                         free(async);
2032                         return -ENOMEM;
2033                 }
2034                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
2035                 if (ret != 1) {
2036                         error("unable to read buffer: %s", strerror(errno));
2037                         free(async->buffer);
2038                         free(async);
2039                         return -EIO;
2040                 }
2041                 bytenr += async->bufsize;
2042
2043                 pthread_mutex_lock(&mdres->mutex);
2044                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
2045                         ret = fill_mdres_info(mdres, async);
2046                         if (ret) {
2047                                 error("unable to set up restore state");
2048                                 pthread_mutex_unlock(&mdres->mutex);
2049                                 free(async->buffer);
2050                                 free(async);
2051                                 return ret;
2052                         }
2053                 }
2054                 list_add_tail(&async->list, &mdres->list);
2055                 mdres->num_items++;
2056                 pthread_cond_signal(&mdres->cond);
2057                 pthread_mutex_unlock(&mdres->mutex);
2058         }
2059         if (bytenr & BLOCK_MASK) {
2060                 char buffer[BLOCK_MASK];
2061                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
2062
2063                 bytenr += size;
2064                 ret = fread(buffer, size, 1, mdres->in);
2065                 if (ret != 1) {
2066                         error("failed to read buffer: %s", strerror(errno));
2067                         return -EIO;
2068                 }
2069         }
2070         *next = bytenr;
2071         return 0;
2072 }
2073
2074 static int wait_for_worker(struct mdrestore_struct *mdres)
2075 {
2076         int ret = 0;
2077
2078         pthread_mutex_lock(&mdres->mutex);
2079         ret = mdres->error;
2080         while (!ret && mdres->num_items > 0) {
2081                 struct timespec ts = {
2082                         .tv_sec = 0,
2083                         .tv_nsec = 10000000,
2084                 };
2085                 pthread_mutex_unlock(&mdres->mutex);
2086                 nanosleep(&ts, NULL);
2087                 pthread_mutex_lock(&mdres->mutex);
2088                 ret = mdres->error;
2089         }
2090         pthread_mutex_unlock(&mdres->mutex);
2091         return ret;
2092 }
2093
2094 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2095                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2096                             u64 cluster_bytenr)
2097 {
2098         struct extent_buffer *eb;
2099         int ret = 0;
2100         int i;
2101
2102         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2103         if (!eb) {
2104                 ret = -ENOMEM;
2105                 goto out;
2106         }
2107
2108         while (item_bytenr != bytenr) {
2109                 buffer += mdres->nodesize;
2110                 item_bytenr += mdres->nodesize;
2111         }
2112
2113         memcpy(eb->data, buffer, mdres->nodesize);
2114         if (btrfs_header_bytenr(eb) != bytenr) {
2115                 error("eb bytenr does not match found bytenr: %llu != %llu",
2116                                 (unsigned long long)btrfs_header_bytenr(eb),
2117                                 (unsigned long long)bytenr);
2118                 ret = -EIO;
2119                 goto out;
2120         }
2121
2122         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2123                    BTRFS_FSID_SIZE)) {
2124                 error("filesystem UUID of eb %llu does not match",
2125                                 (unsigned long long)bytenr);
2126                 ret = -EIO;
2127                 goto out;
2128         }
2129
2130         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2131                 error("wrong eb %llu owner %llu",
2132                                 (unsigned long long)bytenr,
2133                                 (unsigned long long)btrfs_header_owner(eb));
2134                 ret = -EIO;
2135                 goto out;
2136         }
2137
2138         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2139                 struct btrfs_chunk *chunk;
2140                 struct fs_chunk *fs_chunk;
2141                 struct btrfs_key key;
2142                 u64 type;
2143
2144                 if (btrfs_header_level(eb)) {
2145                         u64 blockptr = btrfs_node_blockptr(eb, i);
2146
2147                         ret = search_for_chunk_blocks(mdres, blockptr,
2148                                                       cluster_bytenr);
2149                         if (ret)
2150                                 break;
2151                         continue;
2152                 }
2153
2154                 /* Yay a leaf!  We loves leafs! */
2155                 btrfs_item_key_to_cpu(eb, &key, i);
2156                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2157                         continue;
2158
2159                 fs_chunk = malloc(sizeof(struct fs_chunk));
2160                 if (!fs_chunk) {
2161                         error("not enough memory to allocate chunk");
2162                         ret = -ENOMEM;
2163                         break;
2164                 }
2165                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2166                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2167
2168                 fs_chunk->logical = key.offset;
2169                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2170                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2171                 INIT_LIST_HEAD(&fs_chunk->list);
2172                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2173                                 physical_cmp, 1) != NULL)
2174                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2175                 else
2176                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2177                                     physical_cmp);
2178
2179                 type = btrfs_chunk_type(eb, chunk);
2180                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2181                         fs_chunk->physical_dup =
2182                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2183                 }
2184
2185                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2186                     mdres->last_physical_offset)
2187                         mdres->last_physical_offset = fs_chunk->physical_dup +
2188                                 fs_chunk->bytes;
2189                 else if (fs_chunk->physical + fs_chunk->bytes >
2190                     mdres->last_physical_offset)
2191                         mdres->last_physical_offset = fs_chunk->physical +
2192                                 fs_chunk->bytes;
2193                 mdres->alloced_chunks += fs_chunk->bytes;
2194                 /* in dup case, fs_chunk->bytes should add twice */
2195                 if (fs_chunk->physical_dup)
2196                         mdres->alloced_chunks += fs_chunk->bytes;
2197                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2198         }
2199 out:
2200         free(eb);
2201         return ret;
2202 }
2203
2204 /* If you have to ask you aren't worthy */
2205 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2206                                    u64 search, u64 cluster_bytenr)
2207 {
2208         struct meta_cluster *cluster;
2209         struct meta_cluster_header *header;
2210         struct meta_cluster_item *item;
2211         u64 current_cluster = cluster_bytenr, bytenr;
2212         u64 item_bytenr;
2213         u32 bufsize, nritems, i;
2214         u32 max_size = MAX_PENDING_SIZE * 2;
2215         u8 *buffer, *tmp = NULL;
2216         int ret = 0;
2217
2218         cluster = malloc(BLOCK_SIZE);
2219         if (!cluster) {
2220                 error("not enough memory for cluster");
2221                 return -ENOMEM;
2222         }
2223
2224         buffer = malloc(max_size);
2225         if (!buffer) {
2226                 error("not enough memory for buffer");
2227                 free(cluster);
2228                 return -ENOMEM;
2229         }
2230
2231         if (mdres->compress_method == COMPRESS_ZLIB) {
2232                 tmp = malloc(max_size);
2233                 if (!tmp) {
2234                         error("not enough memory for buffer");
2235                         free(cluster);
2236                         free(buffer);
2237                         return -ENOMEM;
2238                 }
2239         }
2240
2241         bytenr = current_cluster;
2242         while (1) {
2243                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2244                         error("seek failed: %s", strerror(errno));
2245                         ret = -EIO;
2246                         break;
2247                 }
2248
2249                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2250                 if (ret == 0) {
2251                         if (cluster_bytenr != 0) {
2252                                 cluster_bytenr = 0;
2253                                 current_cluster = 0;
2254                                 bytenr = 0;
2255                                 continue;
2256                         }
2257                         error(
2258         "unknown state after reading cluster at %llu, probably corrupted data",
2259                                         cluster_bytenr);
2260                         ret = -EIO;
2261                         break;
2262                 } else if (ret < 0) {
2263                         error("unable to read image at %llu: %s",
2264                                         (unsigned long long)cluster_bytenr,
2265                                         strerror(errno));
2266                         break;
2267                 }
2268                 ret = 0;
2269
2270                 header = &cluster->header;
2271                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2272                     le64_to_cpu(header->bytenr) != current_cluster) {
2273                         error("bad header in metadump image");
2274                         ret = -EIO;
2275                         break;
2276                 }
2277
2278                 bytenr += BLOCK_SIZE;
2279                 nritems = le32_to_cpu(header->nritems);
2280                 for (i = 0; i < nritems; i++) {
2281                         size_t size;
2282
2283                         item = &cluster->items[i];
2284                         bufsize = le32_to_cpu(item->size);
2285                         item_bytenr = le64_to_cpu(item->bytenr);
2286
2287                         if (bufsize > max_size) {
2288                                 error("item %u too big: %u > %u", i, bufsize,
2289                                                 max_size);
2290                                 ret = -EIO;
2291                                 break;
2292                         }
2293
2294                         if (mdres->compress_method == COMPRESS_ZLIB) {
2295                                 ret = fread(tmp, bufsize, 1, mdres->in);
2296                                 if (ret != 1) {
2297                                         error("read error: %s", strerror(errno));
2298                                         ret = -EIO;
2299                                         break;
2300                                 }
2301
2302                                 size = max_size;
2303                                 ret = uncompress(buffer,
2304                                                  (unsigned long *)&size, tmp,
2305                                                  bufsize);
2306                                 if (ret != Z_OK) {
2307                                         error("decompression failed with %d",
2308                                                         ret);
2309                                         ret = -EIO;
2310                                         break;
2311                                 }
2312                         } else {
2313                                 ret = fread(buffer, bufsize, 1, mdres->in);
2314                                 if (ret != 1) {
2315                                         error("read error: %s",
2316                                                         strerror(errno));
2317                                         ret = -EIO;
2318                                         break;
2319                                 }
2320                                 size = bufsize;
2321                         }
2322                         ret = 0;
2323
2324                         if (item_bytenr <= search &&
2325                             item_bytenr + size > search) {
2326                                 ret = read_chunk_block(mdres, buffer, search,
2327                                                        item_bytenr, size,
2328                                                        current_cluster);
2329                                 if (!ret)
2330                                         ret = 1;
2331                                 break;
2332                         }
2333                         bytenr += bufsize;
2334                 }
2335                 if (ret) {
2336                         if (ret > 0)
2337                                 ret = 0;
2338                         break;
2339                 }
2340                 if (bytenr & BLOCK_MASK)
2341                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2342                 current_cluster = bytenr;
2343         }
2344
2345         free(tmp);
2346         free(buffer);
2347         free(cluster);
2348         return ret;
2349 }
2350
2351 static int build_chunk_tree(struct mdrestore_struct *mdres,
2352                             struct meta_cluster *cluster)
2353 {
2354         struct btrfs_super_block *super;
2355         struct meta_cluster_header *header;
2356         struct meta_cluster_item *item = NULL;
2357         u64 chunk_root_bytenr = 0;
2358         u32 i, nritems;
2359         u64 bytenr = 0;
2360         u8 *buffer;
2361         int ret;
2362
2363         /* We can't seek with stdin so don't bother doing this */
2364         if (mdres->in == stdin)
2365                 return 0;
2366
2367         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2368         if (ret <= 0) {
2369                 error("unable to read cluster: %s", strerror(errno));
2370                 return -EIO;
2371         }
2372         ret = 0;
2373
2374         header = &cluster->header;
2375         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2376             le64_to_cpu(header->bytenr) != 0) {
2377                 error("bad header in metadump image");
2378                 return -EIO;
2379         }
2380
2381         bytenr += BLOCK_SIZE;
2382         mdres->compress_method = header->compress;
2383         nritems = le32_to_cpu(header->nritems);
2384         for (i = 0; i < nritems; i++) {
2385                 item = &cluster->items[i];
2386
2387                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2388                         break;
2389                 bytenr += le32_to_cpu(item->size);
2390                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2391                         error("seek failed: %s", strerror(errno));
2392                         return -EIO;
2393                 }
2394         }
2395
2396         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2397                 error("did not find superblock at %llu",
2398                                 le64_to_cpu(item->bytenr));
2399                 return -EINVAL;
2400         }
2401
2402         buffer = malloc(le32_to_cpu(item->size));
2403         if (!buffer) {
2404                 error("not enough memory to allocate buffer");
2405                 return -ENOMEM;
2406         }
2407
2408         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2409         if (ret != 1) {
2410                 error("unable to read buffer: %s", strerror(errno));
2411                 free(buffer);
2412                 return -EIO;
2413         }
2414
2415         if (mdres->compress_method == COMPRESS_ZLIB) {
2416                 size_t size = MAX_PENDING_SIZE * 2;
2417                 u8 *tmp;
2418
2419                 tmp = malloc(MAX_PENDING_SIZE * 2);
2420                 if (!tmp) {
2421                         free(buffer);
2422                         return -ENOMEM;
2423                 }
2424                 ret = uncompress(tmp, (unsigned long *)&size,
2425                                  buffer, le32_to_cpu(item->size));
2426                 if (ret != Z_OK) {
2427                         error("decompression failed with %d", ret);
2428                         free(buffer);
2429                         free(tmp);
2430                         return -EIO;
2431                 }
2432                 free(buffer);
2433                 buffer = tmp;
2434         }
2435
2436         pthread_mutex_lock(&mdres->mutex);
2437         super = (struct btrfs_super_block *)buffer;
2438         chunk_root_bytenr = btrfs_super_chunk_root(super);
2439         mdres->nodesize = btrfs_super_nodesize(super);
2440         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2441         memcpy(mdres->uuid, super->dev_item.uuid,
2442                        BTRFS_UUID_SIZE);
2443         mdres->devid = le64_to_cpu(super->dev_item.devid);
2444         free(buffer);
2445         pthread_mutex_unlock(&mdres->mutex);
2446
2447         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2448 }
2449
2450 static int range_contains_super(u64 physical, u64 bytes)
2451 {
2452         u64 super_bytenr;
2453         int i;
2454
2455         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2456                 super_bytenr = btrfs_sb_offset(i);
2457                 if (super_bytenr >= physical &&
2458                     super_bytenr < physical + bytes)
2459                         return 1;
2460         }
2461
2462         return 0;
2463 }
2464
2465 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2466 {
2467         struct fs_chunk *fs_chunk;
2468
2469         while (!list_empty(&mdres->overlapping_chunks)) {
2470                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2471                                             struct fs_chunk, list);
2472                 list_del_init(&fs_chunk->list);
2473                 if (range_contains_super(fs_chunk->physical,
2474                                          fs_chunk->bytes)) {
2475                         warning(
2476 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2477                         mdres->clear_space_cache = 1;
2478                 }
2479                 fs_chunk->physical = mdres->last_physical_offset;
2480                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2481                 mdres->last_physical_offset += fs_chunk->bytes;
2482         }
2483 }
2484
2485 static int fixup_devices(struct btrfs_fs_info *fs_info,
2486                          struct mdrestore_struct *mdres, off_t dev_size)
2487 {
2488         struct btrfs_trans_handle *trans;
2489         struct btrfs_dev_item *dev_item;
2490         struct btrfs_path path;
2491         struct extent_buffer *leaf;
2492         struct btrfs_root *root = fs_info->chunk_root;
2493         struct btrfs_key key;
2494         u64 devid, cur_devid;
2495         int ret;
2496
2497         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2498         if (IS_ERR(trans)) {
2499                 error("cannot starting transaction %ld", PTR_ERR(trans));
2500                 return PTR_ERR(trans);
2501         }
2502
2503         dev_item = &fs_info->super_copy->dev_item;
2504
2505         devid = btrfs_stack_device_id(dev_item);
2506
2507         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2508         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2509
2510         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2511         key.type = BTRFS_DEV_ITEM_KEY;
2512         key.offset = 0;
2513
2514         btrfs_init_path(&path);
2515
2516 again:
2517         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2518         if (ret < 0) {
2519                 error("search failed: %d", ret);
2520                 exit(1);
2521         }
2522
2523         while (1) {
2524                 leaf = path.nodes[0];
2525                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2526                         ret = btrfs_next_leaf(root, &path);
2527                         if (ret < 0) {
2528                                 error("cannot go to next leaf %d", ret);
2529                                 exit(1);
2530                         }
2531                         if (ret > 0) {
2532                                 ret = 0;
2533                                 break;
2534                         }
2535                         leaf = path.nodes[0];
2536                 }
2537
2538                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2539                 if (key.type > BTRFS_DEV_ITEM_KEY)
2540                         break;
2541                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2542                         path.slots[0]++;
2543                         continue;
2544                 }
2545
2546                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2547                                           struct btrfs_dev_item);
2548                 cur_devid = btrfs_device_id(leaf, dev_item);
2549                 if (devid != cur_devid) {
2550                         ret = btrfs_del_item(trans, root, &path);
2551                         if (ret) {
2552                                 error("cannot delete item: %d", ret);
2553                                 exit(1);
2554                         }
2555                         btrfs_release_path(&path);
2556                         goto again;
2557                 }
2558
2559                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2560                 btrfs_set_device_bytes_used(leaf, dev_item,
2561                                             mdres->alloced_chunks);
2562                 btrfs_mark_buffer_dirty(leaf);
2563                 path.slots[0]++;
2564         }
2565
2566         btrfs_release_path(&path);
2567         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2568         if (ret) {
2569                 error("unable to commit transaction: %d", ret);
2570                 return ret;
2571         }
2572         return 0;
2573 }
2574
2575 static int restore_metadump(const char *input, FILE *out, int old_restore,
2576                             int num_threads, int fixup_offset,
2577                             const char *target, int multi_devices)
2578 {
2579         struct meta_cluster *cluster = NULL;
2580         struct meta_cluster_header *header;
2581         struct mdrestore_struct mdrestore;
2582         struct btrfs_fs_info *info = NULL;
2583         u64 bytenr = 0;
2584         FILE *in = NULL;
2585         int ret = 0;
2586
2587         if (!strcmp(input, "-")) {
2588                 in = stdin;
2589         } else {
2590                 in = fopen(input, "r");
2591                 if (!in) {
2592                         error("unable to open metadump image: %s",
2593                                         strerror(errno));
2594                         return 1;
2595                 }
2596         }
2597
2598         /* NOTE: open with write mode */
2599         if (fixup_offset) {
2600                 info = open_ctree_fs_info(target, 0, 0, 0,
2601                                           OPEN_CTREE_WRITES |
2602                                           OPEN_CTREE_RESTORE |
2603                                           OPEN_CTREE_PARTIAL);
2604                 if (!info) {
2605                         error("open ctree failed");
2606                         ret = -EIO;
2607                         goto failed_open;
2608                 }
2609         }
2610
2611         cluster = malloc(BLOCK_SIZE);
2612         if (!cluster) {
2613                 error("not enough memory for cluster");
2614                 ret = -ENOMEM;
2615                 goto failed_info;
2616         }
2617
2618         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2619                              fixup_offset, info, multi_devices);
2620         if (ret) {
2621                 error("failed to initialize metadata restore state: %d", ret);
2622                 goto failed_cluster;
2623         }
2624
2625         if (!multi_devices && !old_restore) {
2626                 ret = build_chunk_tree(&mdrestore, cluster);
2627                 if (ret)
2628                         goto out;
2629                 if (!list_empty(&mdrestore.overlapping_chunks))
2630                         remap_overlapping_chunks(&mdrestore);
2631         }
2632
2633         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2634                 error("seek failed: %s", strerror(errno));
2635                 goto out;
2636         }
2637
2638         while (!mdrestore.error) {
2639                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2640                 if (!ret)
2641                         break;
2642
2643                 header = &cluster->header;
2644                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2645                     le64_to_cpu(header->bytenr) != bytenr) {
2646                         error("bad header in metadump image");
2647                         ret = -EIO;
2648                         break;
2649                 }
2650                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2651                 if (ret) {
2652                         error("failed to add cluster: %d", ret);
2653                         break;
2654                 }
2655         }
2656         ret = wait_for_worker(&mdrestore);
2657
2658         if (!ret && !multi_devices && !old_restore) {
2659                 struct btrfs_root *root;
2660                 struct stat st;
2661
2662                 root = open_ctree_fd(fileno(out), target, 0,
2663                                           OPEN_CTREE_PARTIAL |
2664                                           OPEN_CTREE_WRITES |
2665                                           OPEN_CTREE_NO_DEVICES);
2666                 if (!root) {
2667                         error("open ctree failed in %s", target);
2668                         ret = -EIO;
2669                         goto out;
2670                 }
2671                 info = root->fs_info;
2672
2673                 if (stat(target, &st)) {
2674                         error("stat %s failed: %s", target, strerror(errno));
2675                         close_ctree(info->chunk_root);
2676                         free(cluster);
2677                         return 1;
2678                 }
2679
2680                 ret = fixup_devices(info, &mdrestore, st.st_size);
2681                 close_ctree(info->chunk_root);
2682                 if (ret)
2683                         goto out;
2684         }
2685 out:
2686         mdrestore_destroy(&mdrestore, num_threads);
2687 failed_cluster:
2688         free(cluster);
2689 failed_info:
2690         if (fixup_offset && info)
2691                 close_ctree(info->chunk_root);
2692 failed_open:
2693         if (in != stdin)
2694                 fclose(in);
2695         return ret;
2696 }
2697
2698 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2699                                        const char *other_dev, u64 cur_devid)
2700 {
2701         struct btrfs_key key;
2702         struct extent_buffer *leaf;
2703         struct btrfs_path path;
2704         struct btrfs_dev_item *dev_item;
2705         struct btrfs_super_block *disk_super;
2706         char dev_uuid[BTRFS_UUID_SIZE];
2707         char fs_uuid[BTRFS_UUID_SIZE];
2708         u64 devid, type, io_align, io_width;
2709         u64 sector_size, total_bytes, bytes_used;
2710         char buf[BTRFS_SUPER_INFO_SIZE];
2711         int fp = -1;
2712         int ret;
2713
2714         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2715         key.type = BTRFS_DEV_ITEM_KEY;
2716         key.offset = cur_devid;
2717
2718         btrfs_init_path(&path);
2719         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2720         if (ret) {
2721                 error("search key failed: %d", ret);
2722                 ret = -EIO;
2723                 goto out;
2724         }
2725
2726         leaf = path.nodes[0];
2727         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2728                                   struct btrfs_dev_item);
2729
2730         devid = btrfs_device_id(leaf, dev_item);
2731         if (devid != cur_devid) {
2732                 error("devid mismatch: %llu != %llu",
2733                                 (unsigned long long)devid,
2734                                 (unsigned long long)cur_devid);
2735                 ret = -EIO;
2736                 goto out;
2737         }
2738
2739         type = btrfs_device_type(leaf, dev_item);
2740         io_align = btrfs_device_io_align(leaf, dev_item);
2741         io_width = btrfs_device_io_width(leaf, dev_item);
2742         sector_size = btrfs_device_sector_size(leaf, dev_item);
2743         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2744         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2745         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2746         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2747
2748         btrfs_release_path(&path);
2749
2750         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2751
2752         /* update other devices' super block */
2753         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2754         if (fp < 0) {
2755                 error("could not open %s: %s", other_dev, strerror(errno));
2756                 ret = -EIO;
2757                 goto out;
2758         }
2759
2760         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2761
2762         disk_super = (struct btrfs_super_block *)buf;
2763         dev_item = &disk_super->dev_item;
2764
2765         btrfs_set_stack_device_type(dev_item, type);
2766         btrfs_set_stack_device_id(dev_item, devid);
2767         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2768         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2769         btrfs_set_stack_device_io_align(dev_item, io_align);
2770         btrfs_set_stack_device_io_width(dev_item, io_width);
2771         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2772         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2773         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2774         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2775
2776         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2777         if (ret != BTRFS_SUPER_INFO_SIZE) {
2778                 if (ret < 0)
2779                         error("cannot write superblock: %s", strerror(ret));
2780                 else
2781                         error("cannot write superblock");
2782                 ret = -EIO;
2783                 goto out;
2784         }
2785
2786         write_backup_supers(fp, (u8 *)buf);
2787
2788 out:
2789         if (fp != -1)
2790                 close(fp);
2791         return ret;
2792 }
2793
2794 static void print_usage(int ret)
2795 {
2796         printf("usage: btrfs-image [options] source target\n");
2797         printf("\t-r      \trestore metadump image\n");
2798         printf("\t-c value\tcompression level (0 ~ 9)\n");
2799         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2800         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2801         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2802         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2803         printf("\t-m       \trestore for multiple devices\n");
2804         printf("\n");
2805         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2806         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2807         exit(ret);
2808 }
2809
2810 int main(int argc, char *argv[])
2811 {
2812         char *source;
2813         char *target;
2814         u64 num_threads = 0;
2815         u64 compress_level = 0;
2816         int create = 1;
2817         int old_restore = 0;
2818         int walk_trees = 0;
2819         int multi_devices = 0;
2820         int ret;
2821         enum sanitize_mode sanitize = SANITIZE_NONE;
2822         int dev_cnt = 0;
2823         int usage_error = 0;
2824         FILE *out;
2825
2826         while (1) {
2827                 static const struct option long_options[] = {
2828                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2829                         { NULL, 0, NULL, 0 }
2830                 };
2831                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2832                 if (c < 0)
2833                         break;
2834                 switch (c) {
2835                 case 'r':
2836                         create = 0;
2837                         break;
2838                 case 't':
2839                         num_threads = arg_strtou64(optarg);
2840                         if (num_threads > MAX_WORKER_THREADS) {
2841                                 error("number of threads out of range: %llu > %d",
2842                                         (unsigned long long)num_threads,
2843                                         MAX_WORKER_THREADS);
2844                                 return 1;
2845                         }
2846                         break;
2847                 case 'c':
2848                         compress_level = arg_strtou64(optarg);
2849                         if (compress_level > 9) {
2850                                 error("compression level out of range: %llu",
2851                                         (unsigned long long)compress_level);
2852                                 return 1;
2853                         }
2854                         break;
2855                 case 'o':
2856                         old_restore = 1;
2857                         break;
2858                 case 's':
2859                         if (sanitize == SANITIZE_NONE)
2860                                 sanitize = SANITIZE_NAMES;
2861                         else if (sanitize == SANITIZE_NAMES)
2862                                 sanitize = SANITIZE_COLLISIONS;
2863                         break;
2864                 case 'w':
2865                         walk_trees = 1;
2866                         break;
2867                 case 'm':
2868                         create = 0;
2869                         multi_devices = 1;
2870                         break;
2871                         case GETOPT_VAL_HELP:
2872                 default:
2873                         print_usage(c != GETOPT_VAL_HELP);
2874                 }
2875         }
2876
2877         set_argv0(argv);
2878         if (check_argc_min(argc - optind, 2))
2879                 print_usage(1);
2880
2881         dev_cnt = argc - optind - 1;
2882
2883         if (create) {
2884                 if (old_restore) {
2885                         error(
2886                         "create and restore cannot be used at the same time");
2887                         usage_error++;
2888                 }
2889         } else {
2890                 if (walk_trees || sanitize != SANITIZE_NONE || compress_level) {
2891                         error(
2892                         "useing -w, -s, -c options for restore makes no sense");
2893                         usage_error++;
2894                 }
2895                 if (multi_devices && dev_cnt < 2) {
2896                         error("not enough devices specified for -m option");
2897                         usage_error++;
2898                 }
2899                 if (!multi_devices && dev_cnt != 1) {
2900                         error("accepts only 1 device without -m option");
2901                         usage_error++;
2902                 }
2903         }
2904
2905         if (usage_error)
2906                 print_usage(1);
2907
2908         source = argv[optind];
2909         target = argv[optind + 1];
2910
2911         if (create && !strcmp(target, "-")) {
2912                 out = stdout;
2913         } else {
2914                 out = fopen(target, "w+");
2915                 if (!out) {
2916                         error("unable to create target file %s", target);
2917                         exit(1);
2918                 }
2919         }
2920
2921         if (compress_level > 0 || create == 0) {
2922                 if (num_threads == 0) {
2923                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2924
2925                         if (tmp <= 0)
2926                                 tmp = 1;
2927                         num_threads = tmp;
2928                 }
2929         } else {
2930                 num_threads = 0;
2931         }
2932
2933         if (create) {
2934                 ret = check_mounted(source);
2935                 if (ret < 0) {
2936                         warning("unable to check mount status of: %s",
2937                                         strerror(-ret));
2938                 } else if (ret) {
2939                         warning("%s already mounted, results may be inaccurate",
2940                                         source);
2941                 }
2942
2943                 ret = create_metadump(source, out, num_threads,
2944                                       compress_level, sanitize, walk_trees);
2945         } else {
2946                 ret = restore_metadump(source, out, old_restore, num_threads,
2947                                        0, target, multi_devices);
2948         }
2949         if (ret) {
2950                 error("%s failed: %s", (create) ? "create" : "restore",
2951                        strerror(errno));
2952                 goto out;
2953         }
2954
2955          /* extended support for multiple devices */
2956         if (!create && multi_devices) {
2957                 struct btrfs_fs_info *info;
2958                 u64 total_devs;
2959                 int i;
2960
2961                 info = open_ctree_fs_info(target, 0, 0, 0,
2962                                           OPEN_CTREE_PARTIAL |
2963                                           OPEN_CTREE_RESTORE);
2964                 if (!info) {
2965                         error("open ctree failed at %s", target);
2966                         return 1;
2967                 }
2968
2969                 total_devs = btrfs_super_num_devices(info->super_copy);
2970                 if (total_devs != dev_cnt) {
2971                         error("it needs %llu devices but has only %d",
2972                                 total_devs, dev_cnt);
2973                         close_ctree(info->chunk_root);
2974                         goto out;
2975                 }
2976
2977                 /* update super block on other disks */
2978                 for (i = 2; i <= dev_cnt; i++) {
2979                         ret = update_disk_super_on_device(info,
2980                                         argv[optind + i], (u64)i);
2981                         if (ret) {
2982                                 error("update disk superblock failed devid %d: %d",
2983                                         i, ret);
2984                                 close_ctree(info->chunk_root);
2985                                 exit(1);
2986                         }
2987                 }
2988
2989                 close_ctree(info->chunk_root);
2990
2991                 /* fix metadata block to map correct chunk */
2992                 ret = restore_metadump(source, out, 0, num_threads, 1,
2993                                        target, 1);
2994                 if (ret) {
2995                         error("unable to fixup metadump: %d", ret);
2996                         exit(1);
2997                 }
2998         }
2999 out:
3000         if (out == stdout) {
3001                 fflush(out);
3002         } else {
3003                 fclose(out);
3004                 if (ret && create) {
3005                         int unlink_ret;
3006
3007                         unlink_ret = unlink(target);
3008                         if (unlink_ret)
3009                                 error("unlink output file %s failed: %s",
3010                                                 target, strerror(errno));
3011                 }
3012         }
3013
3014         btrfs_close_all_devices();
3015
3016         return !!ret;
3017 }