0806f6e4c753b730b4fe277882f2aae0d24e7945
[platform/upstream/btrfs-progs.git] / image / main.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38 #include "help.h"
39 #include "image/metadump.h"
40 #include "image/sanitize.h"
41
42 #define MAX_WORKER_THREADS      (32)
43
44 struct async_work {
45         struct list_head list;
46         struct list_head ordered;
47         u64 start;
48         u64 size;
49         u8 *buffer;
50         size_t bufsize;
51         int error;
52 };
53
54 struct metadump_struct {
55         struct btrfs_root *root;
56         FILE *out;
57
58         union {
59                 struct meta_cluster cluster;
60                 char meta_cluster_bytes[BLOCK_SIZE];
61         };
62
63         pthread_t threads[MAX_WORKER_THREADS];
64         size_t num_threads;
65         pthread_mutex_t mutex;
66         pthread_cond_t cond;
67         struct rb_root name_tree;
68
69         struct list_head list;
70         struct list_head ordered;
71         size_t num_items;
72         size_t num_ready;
73
74         u64 pending_start;
75         u64 pending_size;
76
77         int compress_level;
78         int done;
79         int data;
80         enum sanitize_mode sanitize_names;
81
82         int error;
83 };
84
85 struct mdrestore_struct {
86         FILE *in;
87         FILE *out;
88
89         pthread_t threads[MAX_WORKER_THREADS];
90         size_t num_threads;
91         pthread_mutex_t mutex;
92         pthread_cond_t cond;
93
94         struct rb_root chunk_tree;
95         struct rb_root physical_tree;
96         struct list_head list;
97         struct list_head overlapping_chunks;
98         size_t num_items;
99         u32 nodesize;
100         u64 devid;
101         u64 alloced_chunks;
102         u64 last_physical_offset;
103         u8 uuid[BTRFS_UUID_SIZE];
104         u8 fsid[BTRFS_FSID_SIZE];
105
106         int compress_method;
107         int done;
108         int error;
109         int old_restore;
110         int fixup_offset;
111         int multi_devices;
112         int clear_space_cache;
113         struct btrfs_fs_info *info;
114 };
115
116 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
117                                    u64 search, u64 cluster_bytenr);
118 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
119
120 static void csum_block(u8 *buf, size_t len)
121 {
122         u8 result[BTRFS_CRC32_SIZE];
123         u32 crc = ~(u32)0;
124         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
125         btrfs_csum_final(crc, result);
126         memcpy(buf, result, BTRFS_CRC32_SIZE);
127 }
128
129 static int has_name(struct btrfs_key *key)
130 {
131         switch (key->type) {
132         case BTRFS_DIR_ITEM_KEY:
133         case BTRFS_DIR_INDEX_KEY:
134         case BTRFS_INODE_REF_KEY:
135         case BTRFS_INODE_EXTREF_KEY:
136         case BTRFS_XATTR_ITEM_KEY:
137                 return 1;
138         default:
139                 break;
140         }
141
142         return 0;
143 }
144
145 static char *generate_garbage(u32 name_len)
146 {
147         char *buf = malloc(name_len);
148         int i;
149
150         if (!buf)
151                 return NULL;
152
153         for (i = 0; i < name_len; i++) {
154                 char c = rand_range(94) + 33;
155
156                 if (c == '/')
157                         c++;
158                 buf[i] = c;
159         }
160
161         return buf;
162 }
163
164 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
165 {
166         struct name *entry = rb_entry(a, struct name, n);
167         struct name *ins = rb_entry(b, struct name, n);
168         u32 len;
169
170         len = min(ins->len, entry->len);
171         return memcmp(ins->val, entry->val, len);
172 }
173
174 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
175 {
176         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
177         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
178
179         if (fuzz && ins->logical >= entry->logical &&
180             ins->logical < entry->logical + entry->bytes)
181                 return 0;
182
183         if (ins->logical < entry->logical)
184                 return -1;
185         else if (ins->logical > entry->logical)
186                 return 1;
187         return 0;
188 }
189
190 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
191 {
192         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
193         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
194
195         if (fuzz && ins->physical >= entry->physical &&
196             ins->physical < entry->physical + entry->bytes)
197                 return 0;
198
199         if (fuzz && entry->physical >= ins->physical &&
200             entry->physical < ins->physical + ins->bytes)
201                 return 0;
202
203         if (ins->physical < entry->physical)
204                 return -1;
205         else if (ins->physical > entry->physical)
206                 return 1;
207         return 0;
208 }
209
210 static void tree_insert(struct rb_root *root, struct rb_node *ins,
211                         int (*cmp)(struct rb_node *a, struct rb_node *b,
212                                    int fuzz))
213 {
214         struct rb_node ** p = &root->rb_node;
215         struct rb_node * parent = NULL;
216         int dir;
217
218         while(*p) {
219                 parent = *p;
220
221                 dir = cmp(*p, ins, 1);
222                 if (dir < 0)
223                         p = &(*p)->rb_left;
224                 else if (dir > 0)
225                         p = &(*p)->rb_right;
226                 else
227                         BUG();
228         }
229
230         rb_link_node(ins, parent, p);
231         rb_insert_color(ins, root);
232 }
233
234 static struct rb_node *tree_search(struct rb_root *root,
235                                    struct rb_node *search,
236                                    int (*cmp)(struct rb_node *a,
237                                               struct rb_node *b, int fuzz),
238                                    int fuzz)
239 {
240         struct rb_node *n = root->rb_node;
241         int dir;
242
243         while (n) {
244                 dir = cmp(n, search, fuzz);
245                 if (dir < 0)
246                         n = n->rb_left;
247                 else if (dir > 0)
248                         n = n->rb_right;
249                 else
250                         return n;
251         }
252
253         return NULL;
254 }
255
256 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
257                                u64 *size, u64 *physical_dup)
258 {
259         struct fs_chunk *fs_chunk;
260         struct rb_node *entry;
261         struct fs_chunk search;
262         u64 offset;
263
264         if (logical == BTRFS_SUPER_INFO_OFFSET)
265                 return logical;
266
267         search.logical = logical;
268         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
269         if (!entry) {
270                 if (mdres->in != stdin)
271                         warning("cannot find a chunk, using logical");
272                 return logical;
273         }
274         fs_chunk = rb_entry(entry, struct fs_chunk, l);
275         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
276                 BUG();
277         offset = search.logical - fs_chunk->logical;
278
279         if (physical_dup) {
280                 /* Only in dup case, physical_dup is not equal to 0 */
281                 if (fs_chunk->physical_dup)
282                         *physical_dup = fs_chunk->physical_dup + offset;
283                 else
284                         *physical_dup = 0;
285         }
286
287         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
288         return fs_chunk->physical + offset;
289 }
290
291 /*
292  * Reverse CRC-32C table
293  */
294 static const u32 crc32c_rev_table[256] = {
295         0x00000000L,0x05EC76F1L,0x0BD8EDE2L,0x0E349B13L,
296         0x17B1DBC4L,0x125DAD35L,0x1C693626L,0x198540D7L,
297         0x2F63B788L,0x2A8FC179L,0x24BB5A6AL,0x21572C9BL,
298         0x38D26C4CL,0x3D3E1ABDL,0x330A81AEL,0x36E6F75FL,
299         0x5EC76F10L,0x5B2B19E1L,0x551F82F2L,0x50F3F403L,
300         0x4976B4D4L,0x4C9AC225L,0x42AE5936L,0x47422FC7L,
301         0x71A4D898L,0x7448AE69L,0x7A7C357AL,0x7F90438BL,
302         0x6615035CL,0x63F975ADL,0x6DCDEEBEL,0x6821984FL,
303         0xBD8EDE20L,0xB862A8D1L,0xB65633C2L,0xB3BA4533L,
304         0xAA3F05E4L,0xAFD37315L,0xA1E7E806L,0xA40B9EF7L,
305         0x92ED69A8L,0x97011F59L,0x9935844AL,0x9CD9F2BBL,
306         0x855CB26CL,0x80B0C49DL,0x8E845F8EL,0x8B68297FL,
307         0xE349B130L,0xE6A5C7C1L,0xE8915CD2L,0xED7D2A23L,
308         0xF4F86AF4L,0xF1141C05L,0xFF208716L,0xFACCF1E7L,
309         0xCC2A06B8L,0xC9C67049L,0xC7F2EB5AL,0xC21E9DABL,
310         0xDB9BDD7CL,0xDE77AB8DL,0xD043309EL,0xD5AF466FL,
311         0x7EF1CAB1L,0x7B1DBC40L,0x75292753L,0x70C551A2L,
312         0x69401175L,0x6CAC6784L,0x6298FC97L,0x67748A66L,
313         0x51927D39L,0x547E0BC8L,0x5A4A90DBL,0x5FA6E62AL,
314         0x4623A6FDL,0x43CFD00CL,0x4DFB4B1FL,0x48173DEEL,
315         0x2036A5A1L,0x25DAD350L,0x2BEE4843L,0x2E023EB2L,
316         0x37877E65L,0x326B0894L,0x3C5F9387L,0x39B3E576L,
317         0x0F551229L,0x0AB964D8L,0x048DFFCBL,0x0161893AL,
318         0x18E4C9EDL,0x1D08BF1CL,0x133C240FL,0x16D052FEL,
319         0xC37F1491L,0xC6936260L,0xC8A7F973L,0xCD4B8F82L,
320         0xD4CECF55L,0xD122B9A4L,0xDF1622B7L,0xDAFA5446L,
321         0xEC1CA319L,0xE9F0D5E8L,0xE7C44EFBL,0xE228380AL,
322         0xFBAD78DDL,0xFE410E2CL,0xF075953FL,0xF599E3CEL,
323         0x9DB87B81L,0x98540D70L,0x96609663L,0x938CE092L,
324         0x8A09A045L,0x8FE5D6B4L,0x81D14DA7L,0x843D3B56L,
325         0xB2DBCC09L,0xB737BAF8L,0xB90321EBL,0xBCEF571AL,
326         0xA56A17CDL,0xA086613CL,0xAEB2FA2FL,0xAB5E8CDEL,
327         0xFDE39562L,0xF80FE393L,0xF63B7880L,0xF3D70E71L,
328         0xEA524EA6L,0xEFBE3857L,0xE18AA344L,0xE466D5B5L,
329         0xD28022EAL,0xD76C541BL,0xD958CF08L,0xDCB4B9F9L,
330         0xC531F92EL,0xC0DD8FDFL,0xCEE914CCL,0xCB05623DL,
331         0xA324FA72L,0xA6C88C83L,0xA8FC1790L,0xAD106161L,
332         0xB49521B6L,0xB1795747L,0xBF4DCC54L,0xBAA1BAA5L,
333         0x8C474DFAL,0x89AB3B0BL,0x879FA018L,0x8273D6E9L,
334         0x9BF6963EL,0x9E1AE0CFL,0x902E7BDCL,0x95C20D2DL,
335         0x406D4B42L,0x45813DB3L,0x4BB5A6A0L,0x4E59D051L,
336         0x57DC9086L,0x5230E677L,0x5C047D64L,0x59E80B95L,
337         0x6F0EFCCAL,0x6AE28A3BL,0x64D61128L,0x613A67D9L,
338         0x78BF270EL,0x7D5351FFL,0x7367CAECL,0x768BBC1DL,
339         0x1EAA2452L,0x1B4652A3L,0x1572C9B0L,0x109EBF41L,
340         0x091BFF96L,0x0CF78967L,0x02C31274L,0x072F6485L,
341         0x31C993DAL,0x3425E52BL,0x3A117E38L,0x3FFD08C9L,
342         0x2678481EL,0x23943EEFL,0x2DA0A5FCL,0x284CD30DL,
343         0x83125FD3L,0x86FE2922L,0x88CAB231L,0x8D26C4C0L,
344         0x94A38417L,0x914FF2E6L,0x9F7B69F5L,0x9A971F04L,
345         0xAC71E85BL,0xA99D9EAAL,0xA7A905B9L,0xA2457348L,
346         0xBBC0339FL,0xBE2C456EL,0xB018DE7DL,0xB5F4A88CL,
347         0xDDD530C3L,0xD8394632L,0xD60DDD21L,0xD3E1ABD0L,
348         0xCA64EB07L,0xCF889DF6L,0xC1BC06E5L,0xC4507014L,
349         0xF2B6874BL,0xF75AF1BAL,0xF96E6AA9L,0xFC821C58L,
350         0xE5075C8FL,0xE0EB2A7EL,0xEEDFB16DL,0xEB33C79CL,
351         0x3E9C81F3L,0x3B70F702L,0x35446C11L,0x30A81AE0L,
352         0x292D5A37L,0x2CC12CC6L,0x22F5B7D5L,0x2719C124L,
353         0x11FF367BL,0x1413408AL,0x1A27DB99L,0x1FCBAD68L,
354         0x064EEDBFL,0x03A29B4EL,0x0D96005DL,0x087A76ACL,
355         0x605BEEE3L,0x65B79812L,0x6B830301L,0x6E6F75F0L,
356         0x77EA3527L,0x720643D6L,0x7C32D8C5L,0x79DEAE34L,
357         0x4F38596BL,0x4AD42F9AL,0x44E0B489L,0x410CC278L,
358         0x588982AFL,0x5D65F45EL,0x53516F4DL,0x56BD19BCL
359 };
360
361 /*
362  * Calculate a 4-byte suffix to match desired CRC32C
363  *
364  * @current_crc: CRC32C checksum of all bytes before the suffix
365  * @desired_crc: the checksum that we want to get after adding the suffix
366  *
367  * Outputs: @suffix: pointer to where the suffix will be written (4-bytes)
368  */
369 static void find_collision_calc_suffix(unsigned long current_crc,
370                                        unsigned long desired_crc,
371                                        char *suffix)
372 {
373         int i;
374
375         for(i = 3; i >= 0; i--) {
376                 desired_crc = (desired_crc << 8)
377                             ^ crc32c_rev_table[desired_crc >> 24 & 0xFF]
378                             ^ ((current_crc >> i * 8) & 0xFF);
379         }
380         for (i = 0; i < 4; i++)
381                 suffix[i] = (desired_crc >> i * 8) & 0xFF;
382 }
383
384 /*
385  * Check if suffix is valid according to our file name conventions
386  */
387 static int find_collision_is_suffix_valid(const char *suffix)
388 {
389         int i;
390         char c;
391
392         for (i = 0; i < 4; i++) {
393                 c = suffix[i];
394                 if (c < ' ' || c > 126 || c == '/')
395                         return 0;
396         }
397         return 1;
398 }
399
400 static int find_collision_reverse_crc32c(struct name *val, u32 name_len)
401 {
402         unsigned long checksum;
403         unsigned long current_checksum;
404         int found = 0;
405         int i;
406
407         /* There are no same length collisions of 4 or less bytes */
408         if (name_len <= 4)
409                 return 0;
410         checksum = crc32c(~1, val->val, name_len);
411         name_len -= 4;
412         memset(val->sub, ' ', name_len);
413         i = 0;
414         while (1) {
415                 current_checksum = crc32c(~1, val->sub, name_len);
416                 find_collision_calc_suffix(current_checksum,
417                                            checksum,
418                                            val->sub + name_len);
419                 if (find_collision_is_suffix_valid(val->sub + name_len) &&
420                     memcmp(val->sub, val->val, val->len)) {
421                         found = 1;
422                         break;
423                 }
424
425                 if (val->sub[i] == 126) {
426                         do {
427                                 i++;
428                                 if (i >= name_len)
429                                         break;
430                         } while (val->sub[i] == 126);
431
432                         if (i >= name_len)
433                                 break;
434                         val->sub[i]++;
435                         if (val->sub[i] == '/')
436                                 val->sub[i]++;
437                         memset(val->sub, ' ', i);
438                         i = 0;
439                         continue;
440                 } else {
441                         val->sub[i]++;
442                         if (val->sub[i] == '/')
443                                 val->sub[i]++;
444                 }
445         }
446         return found;
447 }
448
449 static char *find_collision(struct rb_root *name_tree, char *name,
450                             u32 name_len)
451 {
452         struct name *val;
453         struct rb_node *entry;
454         struct name tmp;
455         int found;
456         int i;
457
458         tmp.val = name;
459         tmp.len = name_len;
460         entry = tree_search(name_tree, &tmp.n, name_cmp, 0);
461         if (entry) {
462                 val = rb_entry(entry, struct name, n);
463                 free(name);
464                 return val->sub;
465         }
466
467         val = malloc(sizeof(struct name));
468         if (!val) {
469                 error("cannot sanitize name, not enough memory");
470                 free(name);
471                 return NULL;
472         }
473
474         memset(val, 0, sizeof(*val));
475
476         val->val = name;
477         val->len = name_len;
478         val->sub = malloc(name_len);
479         if (!val->sub) {
480                 error("cannot sanitize name, not enough memory");
481                 free(val);
482                 free(name);
483                 return NULL;
484         }
485
486         found = find_collision_reverse_crc32c(val, name_len);
487
488         if (!found) {
489                 warning(
490 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
491                         val->len, val->val);
492                 for (i = 0; i < name_len; i++) {
493                         char c = rand_range(94) + 33;
494
495                         if (c == '/')
496                                 c++;
497                         val->sub[i] = c;
498                 }
499         }
500
501         tree_insert(name_tree, &val->n, name_cmp);
502         return val->sub;
503 }
504
505 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
506                               int slot)
507 {
508         struct btrfs_dir_item *dir_item;
509         char *buf;
510         char *garbage;
511         unsigned long name_ptr;
512         u32 total_len;
513         u32 cur = 0;
514         u32 this_len;
515         u32 name_len;
516         int free_garbage = (md->sanitize_names == SANITIZE_NAMES);
517
518         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
519         total_len = btrfs_item_size_nr(eb, slot);
520         while (cur < total_len) {
521                 this_len = sizeof(*dir_item) +
522                         btrfs_dir_name_len(eb, dir_item) +
523                         btrfs_dir_data_len(eb, dir_item);
524                 name_ptr = (unsigned long)(dir_item + 1);
525                 name_len = btrfs_dir_name_len(eb, dir_item);
526
527                 if (md->sanitize_names == SANITIZE_COLLISIONS) {
528                         buf = malloc(name_len);
529                         if (!buf) {
530                                 error("cannot sanitize name, not enough memory");
531                                 return;
532                         }
533                         read_extent_buffer(eb, buf, name_ptr, name_len);
534                         garbage = find_collision(&md->name_tree, buf, name_len);
535                 } else {
536                         garbage = generate_garbage(name_len);
537                 }
538                 if (!garbage) {
539                         error("cannot sanitize name, not enough memory");
540                         return;
541                 }
542                 write_extent_buffer(eb, garbage, name_ptr, name_len);
543                 cur += this_len;
544                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
545                                                      this_len);
546                 if (free_garbage)
547                         free(garbage);
548         }
549 }
550
551 static void sanitize_inode_ref(struct metadump_struct *md,
552                                struct extent_buffer *eb, int slot, int ext)
553 {
554         struct btrfs_inode_extref *extref;
555         struct btrfs_inode_ref *ref;
556         char *garbage, *buf;
557         unsigned long ptr;
558         unsigned long name_ptr;
559         u32 item_size;
560         u32 cur_offset = 0;
561         int len;
562         int free_garbage = (md->sanitize_names == SANITIZE_NAMES);
563
564         item_size = btrfs_item_size_nr(eb, slot);
565         ptr = btrfs_item_ptr_offset(eb, slot);
566         while (cur_offset < item_size) {
567                 if (ext) {
568                         extref = (struct btrfs_inode_extref *)(ptr +
569                                                                cur_offset);
570                         name_ptr = (unsigned long)(&extref->name);
571                         len = btrfs_inode_extref_name_len(eb, extref);
572                         cur_offset += sizeof(*extref);
573                 } else {
574                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
575                         len = btrfs_inode_ref_name_len(eb, ref);
576                         name_ptr = (unsigned long)(ref + 1);
577                         cur_offset += sizeof(*ref);
578                 }
579                 cur_offset += len;
580
581                 if (md->sanitize_names == SANITIZE_COLLISIONS) {
582                         buf = malloc(len);
583                         if (!buf) {
584                                 error("cannot sanitize name, not enough memory");
585                                 return;
586                         }
587                         read_extent_buffer(eb, buf, name_ptr, len);
588                         garbage = find_collision(&md->name_tree, buf, len);
589                 } else {
590                         garbage = generate_garbage(len);
591                 }
592
593                 if (!garbage) {
594                         error("cannot sanitize name, not enough memory");
595                         return;
596                 }
597                 write_extent_buffer(eb, garbage, name_ptr, len);
598                 if (free_garbage)
599                         free(garbage);
600         }
601 }
602
603 static void sanitize_xattr(struct metadump_struct *md,
604                            struct extent_buffer *eb, int slot)
605 {
606         struct btrfs_dir_item *dir_item;
607         unsigned long data_ptr;
608         u32 data_len;
609
610         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
611         data_len = btrfs_dir_data_len(eb, dir_item);
612
613         data_ptr = (unsigned long)((char *)(dir_item + 1) +
614                                    btrfs_dir_name_len(eb, dir_item));
615         memset_extent_buffer(eb, 0, data_ptr, data_len);
616 }
617
618 static void sanitize_name(struct metadump_struct *md, u8 *dst,
619                           struct extent_buffer *src, struct btrfs_key *key,
620                           int slot)
621 {
622         struct extent_buffer *eb;
623
624         eb = alloc_dummy_eb(src->start, src->len);
625         if (!eb) {
626                 error("cannot sanitize name, not enough memory");
627                 return;
628         }
629
630         memcpy(eb->data, src->data, src->len);
631
632         switch (key->type) {
633         case BTRFS_DIR_ITEM_KEY:
634         case BTRFS_DIR_INDEX_KEY:
635                 sanitize_dir_item(md, eb, slot);
636                 break;
637         case BTRFS_INODE_REF_KEY:
638                 sanitize_inode_ref(md, eb, slot, 0);
639                 break;
640         case BTRFS_INODE_EXTREF_KEY:
641                 sanitize_inode_ref(md, eb, slot, 1);
642                 break;
643         case BTRFS_XATTR_ITEM_KEY:
644                 sanitize_xattr(md, eb, slot);
645                 break;
646         default:
647                 break;
648         }
649
650         memcpy(dst, eb->data, eb->len);
651         free(eb);
652 }
653
654 /*
655  * zero inline extents and csum items
656  */
657 static void zero_items(struct metadump_struct *md, u8 *dst,
658                        struct extent_buffer *src)
659 {
660         struct btrfs_file_extent_item *fi;
661         struct btrfs_item *item;
662         struct btrfs_key key;
663         u32 nritems = btrfs_header_nritems(src);
664         size_t size;
665         unsigned long ptr;
666         int i, extent_type;
667
668         for (i = 0; i < nritems; i++) {
669                 item = btrfs_item_nr(i);
670                 btrfs_item_key_to_cpu(src, &key, i);
671                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
672                         size = btrfs_item_size_nr(src, i);
673                         memset(dst + btrfs_leaf_data(src) +
674                                btrfs_item_offset_nr(src, i), 0, size);
675                         continue;
676                 }
677
678                 if (md->sanitize_names && has_name(&key)) {
679                         sanitize_name(md, dst, src, &key, i);
680                         continue;
681                 }
682
683                 if (key.type != BTRFS_EXTENT_DATA_KEY)
684                         continue;
685
686                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
687                 extent_type = btrfs_file_extent_type(src, fi);
688                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
689                         continue;
690
691                 ptr = btrfs_file_extent_inline_start(fi);
692                 size = btrfs_file_extent_inline_item_len(src, item);
693                 memset(dst + ptr, 0, size);
694         }
695 }
696
697 /*
698  * copy buffer and zero useless data in the buffer
699  */
700 static void copy_buffer(struct metadump_struct *md, u8 *dst,
701                         struct extent_buffer *src)
702 {
703         int level;
704         size_t size;
705         u32 nritems;
706
707         memcpy(dst, src->data, src->len);
708         if (src->start == BTRFS_SUPER_INFO_OFFSET)
709                 return;
710
711         level = btrfs_header_level(src);
712         nritems = btrfs_header_nritems(src);
713
714         if (nritems == 0) {
715                 size = sizeof(struct btrfs_header);
716                 memset(dst + size, 0, src->len - size);
717         } else if (level == 0) {
718                 size = btrfs_leaf_data(src) +
719                         btrfs_item_offset_nr(src, nritems - 1) -
720                         btrfs_item_nr_offset(nritems);
721                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
722                 zero_items(md, dst, src);
723         } else {
724                 size = offsetof(struct btrfs_node, ptrs) +
725                         sizeof(struct btrfs_key_ptr) * nritems;
726                 memset(dst + size, 0, src->len - size);
727         }
728         csum_block(dst, src->len);
729 }
730
731 static void *dump_worker(void *data)
732 {
733         struct metadump_struct *md = (struct metadump_struct *)data;
734         struct async_work *async;
735         int ret;
736
737         while (1) {
738                 pthread_mutex_lock(&md->mutex);
739                 while (list_empty(&md->list)) {
740                         if (md->done) {
741                                 pthread_mutex_unlock(&md->mutex);
742                                 goto out;
743                         }
744                         pthread_cond_wait(&md->cond, &md->mutex);
745                 }
746                 async = list_entry(md->list.next, struct async_work, list);
747                 list_del_init(&async->list);
748                 pthread_mutex_unlock(&md->mutex);
749
750                 if (md->compress_level > 0) {
751                         u8 *orig = async->buffer;
752
753                         async->bufsize = compressBound(async->size);
754                         async->buffer = malloc(async->bufsize);
755                         if (!async->buffer) {
756                                 error("not enough memory for async buffer");
757                                 pthread_mutex_lock(&md->mutex);
758                                 if (!md->error)
759                                         md->error = -ENOMEM;
760                                 pthread_mutex_unlock(&md->mutex);
761                                 pthread_exit(NULL);
762                         }
763
764                         ret = compress2(async->buffer,
765                                          (unsigned long *)&async->bufsize,
766                                          orig, async->size, md->compress_level);
767
768                         if (ret != Z_OK)
769                                 async->error = 1;
770
771                         free(orig);
772                 }
773
774                 pthread_mutex_lock(&md->mutex);
775                 md->num_ready++;
776                 pthread_mutex_unlock(&md->mutex);
777         }
778 out:
779         pthread_exit(NULL);
780 }
781
782 static void meta_cluster_init(struct metadump_struct *md, u64 start)
783 {
784         struct meta_cluster_header *header;
785
786         md->num_items = 0;
787         md->num_ready = 0;
788         header = &md->cluster.header;
789         header->magic = cpu_to_le64(HEADER_MAGIC);
790         header->bytenr = cpu_to_le64(start);
791         header->nritems = cpu_to_le32(0);
792         header->compress = md->compress_level > 0 ?
793                            COMPRESS_ZLIB : COMPRESS_NONE;
794 }
795
796 static void metadump_destroy(struct metadump_struct *md, int num_threads)
797 {
798         int i;
799         struct rb_node *n;
800
801         pthread_mutex_lock(&md->mutex);
802         md->done = 1;
803         pthread_cond_broadcast(&md->cond);
804         pthread_mutex_unlock(&md->mutex);
805
806         for (i = 0; i < num_threads; i++)
807                 pthread_join(md->threads[i], NULL);
808
809         pthread_cond_destroy(&md->cond);
810         pthread_mutex_destroy(&md->mutex);
811
812         while ((n = rb_first(&md->name_tree))) {
813                 struct name *name;
814
815                 name = rb_entry(n, struct name, n);
816                 rb_erase(n, &md->name_tree);
817                 free(name->val);
818                 free(name->sub);
819                 free(name);
820         }
821 }
822
823 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
824                          FILE *out, int num_threads, int compress_level,
825                          enum sanitize_mode sanitize_names)
826 {
827         int i, ret = 0;
828
829         memset(md, 0, sizeof(*md));
830         INIT_LIST_HEAD(&md->list);
831         INIT_LIST_HEAD(&md->ordered);
832         md->root = root;
833         md->out = out;
834         md->pending_start = (u64)-1;
835         md->compress_level = compress_level;
836         md->sanitize_names = sanitize_names;
837         if (sanitize_names == SANITIZE_COLLISIONS)
838                 crc32c_optimization_init();
839
840         md->name_tree.rb_node = NULL;
841         md->num_threads = num_threads;
842         pthread_cond_init(&md->cond, NULL);
843         pthread_mutex_init(&md->mutex, NULL);
844         meta_cluster_init(md, 0);
845
846         if (!num_threads)
847                 return 0;
848
849         for (i = 0; i < num_threads; i++) {
850                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
851                 if (ret)
852                         break;
853         }
854
855         if (ret)
856                 metadump_destroy(md, i + 1);
857
858         return ret;
859 }
860
861 static int write_zero(FILE *out, size_t size)
862 {
863         static char zero[BLOCK_SIZE];
864         return fwrite(zero, size, 1, out);
865 }
866
867 static int write_buffers(struct metadump_struct *md, u64 *next)
868 {
869         struct meta_cluster_header *header = &md->cluster.header;
870         struct meta_cluster_item *item;
871         struct async_work *async;
872         u64 bytenr = 0;
873         u32 nritems = 0;
874         int ret;
875         int err = 0;
876
877         if (list_empty(&md->ordered))
878                 goto out;
879
880         /* wait until all buffers are compressed */
881         while (!err && md->num_items > md->num_ready) {
882                 struct timespec ts = {
883                         .tv_sec = 0,
884                         .tv_nsec = 10000000,
885                 };
886                 pthread_mutex_unlock(&md->mutex);
887                 nanosleep(&ts, NULL);
888                 pthread_mutex_lock(&md->mutex);
889                 err = md->error;
890         }
891
892         if (err) {
893                 error("one of the threads failed: %s", strerror(-err));
894                 goto out;
895         }
896
897         /* setup and write index block */
898         list_for_each_entry(async, &md->ordered, ordered) {
899                 item = &md->cluster.items[nritems];
900                 item->bytenr = cpu_to_le64(async->start);
901                 item->size = cpu_to_le32(async->bufsize);
902                 nritems++;
903         }
904         header->nritems = cpu_to_le32(nritems);
905
906         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
907         if (ret != 1) {
908                 error("unable to write out cluster: %s", strerror(errno));
909                 return -errno;
910         }
911
912         /* write buffers */
913         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
914         while (!list_empty(&md->ordered)) {
915                 async = list_entry(md->ordered.next, struct async_work,
916                                    ordered);
917                 list_del_init(&async->ordered);
918
919                 bytenr += async->bufsize;
920                 if (!err)
921                         ret = fwrite(async->buffer, async->bufsize, 1,
922                                      md->out);
923                 if (ret != 1) {
924                         error("unable to write out cluster: %s",
925                                 strerror(errno));
926                         err = -errno;
927                         ret = 0;
928                 }
929
930                 free(async->buffer);
931                 free(async);
932         }
933
934         /* zero unused space in the last block */
935         if (!err && bytenr & BLOCK_MASK) {
936                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
937
938                 bytenr += size;
939                 ret = write_zero(md->out, size);
940                 if (ret != 1) {
941                         error("unable to zero out buffer: %s",
942                                 strerror(errno));
943                         err = -errno;
944                 }
945         }
946 out:
947         *next = bytenr;
948         return err;
949 }
950
951 static int read_data_extent(struct metadump_struct *md,
952                             struct async_work *async)
953 {
954         struct btrfs_root *root = md->root;
955         struct btrfs_fs_info *fs_info = root->fs_info;
956         u64 bytes_left = async->size;
957         u64 logical = async->start;
958         u64 offset = 0;
959         u64 read_len;
960         int num_copies;
961         int cur_mirror;
962         int ret;
963
964         num_copies = btrfs_num_copies(root->fs_info, logical, bytes_left);
965
966         /* Try our best to read data, just like read_tree_block() */
967         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
968                 while (bytes_left) {
969                         read_len = bytes_left;
970                         ret = read_extent_data(fs_info,
971                                         (char *)(async->buffer + offset),
972                                         logical, &read_len, cur_mirror);
973                         if (ret < 0)
974                                 break;
975                         offset += read_len;
976                         logical += read_len;
977                         bytes_left -= read_len;
978                 }
979         }
980         if (bytes_left)
981                 return -EIO;
982         return 0;
983 }
984
985 static int get_dev_fd(struct btrfs_root *root)
986 {
987         struct btrfs_device *dev;
988
989         dev = list_first_entry(&root->fs_info->fs_devices->devices,
990                                struct btrfs_device, dev_list);
991         return dev->fd;
992 }
993
994 static int flush_pending(struct metadump_struct *md, int done)
995 {
996         struct async_work *async = NULL;
997         struct extent_buffer *eb;
998         u64 start = 0;
999         u64 size;
1000         size_t offset;
1001         int ret = 0;
1002
1003         if (md->pending_size) {
1004                 async = calloc(1, sizeof(*async));
1005                 if (!async)
1006                         return -ENOMEM;
1007
1008                 async->start = md->pending_start;
1009                 async->size = md->pending_size;
1010                 async->bufsize = async->size;
1011                 async->buffer = malloc(async->bufsize);
1012                 if (!async->buffer) {
1013                         free(async);
1014                         return -ENOMEM;
1015                 }
1016                 offset = 0;
1017                 start = async->start;
1018                 size = async->size;
1019
1020                 if (md->data) {
1021                         ret = read_data_extent(md, async);
1022                         if (ret) {
1023                                 free(async->buffer);
1024                                 free(async);
1025                                 return ret;
1026                         }
1027                 }
1028
1029                 /*
1030                  * Balance can make the mapping not cover the super block, so
1031                  * just copy directly from one of the devices.
1032                  */
1033                 if (start == BTRFS_SUPER_INFO_OFFSET) {
1034                         int fd = get_dev_fd(md->root);
1035
1036                         ret = pread64(fd, async->buffer, size, start);
1037                         if (ret < size) {
1038                                 free(async->buffer);
1039                                 free(async);
1040                                 error("unable to read superblock at %llu: %s",
1041                                                 (unsigned long long)start,
1042                                                 strerror(errno));
1043                                 return -errno;
1044                         }
1045                         size = 0;
1046                         ret = 0;
1047                 }
1048
1049                 while (!md->data && size > 0) {
1050                         u64 this_read = min((u64)md->root->fs_info->nodesize,
1051                                         size);
1052
1053                         eb = read_tree_block(md->root->fs_info, start, 0);
1054                         if (!extent_buffer_uptodate(eb)) {
1055                                 free(async->buffer);
1056                                 free(async);
1057                                 error("unable to read metadata block %llu",
1058                                         (unsigned long long)start);
1059                                 return -EIO;
1060                         }
1061                         copy_buffer(md, async->buffer + offset, eb);
1062                         free_extent_buffer(eb);
1063                         start += this_read;
1064                         offset += this_read;
1065                         size -= this_read;
1066                 }
1067
1068                 md->pending_start = (u64)-1;
1069                 md->pending_size = 0;
1070         } else if (!done) {
1071                 return 0;
1072         }
1073
1074         pthread_mutex_lock(&md->mutex);
1075         if (async) {
1076                 list_add_tail(&async->ordered, &md->ordered);
1077                 md->num_items++;
1078                 if (md->compress_level > 0) {
1079                         list_add_tail(&async->list, &md->list);
1080                         pthread_cond_signal(&md->cond);
1081                 } else {
1082                         md->num_ready++;
1083                 }
1084         }
1085         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1086                 ret = write_buffers(md, &start);
1087                 if (ret)
1088                         error("unable to write buffers: %s", strerror(-ret));
1089                 else
1090                         meta_cluster_init(md, start);
1091         }
1092         pthread_mutex_unlock(&md->mutex);
1093         return ret;
1094 }
1095
1096 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1097                       int data)
1098 {
1099         int ret;
1100         if (md->data != data ||
1101             md->pending_size + size > MAX_PENDING_SIZE ||
1102             md->pending_start + md->pending_size != start) {
1103                 ret = flush_pending(md, 0);
1104                 if (ret)
1105                         return ret;
1106                 md->pending_start = start;
1107         }
1108         readahead_tree_block(md->root->fs_info, start, 0);
1109         md->pending_size += size;
1110         md->data = data;
1111         return 0;
1112 }
1113
1114 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1115 static int is_tree_block(struct btrfs_root *extent_root,
1116                          struct btrfs_path *path, u64 bytenr)
1117 {
1118         struct extent_buffer *leaf;
1119         struct btrfs_key key;
1120         u64 ref_objectid;
1121         int ret;
1122
1123         leaf = path->nodes[0];
1124         while (1) {
1125                 struct btrfs_extent_ref_v0 *ref_item;
1126                 path->slots[0]++;
1127                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1128                         ret = btrfs_next_leaf(extent_root, path);
1129                         if (ret < 0)
1130                                 return ret;
1131                         if (ret > 0)
1132                                 break;
1133                         leaf = path->nodes[0];
1134                 }
1135                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1136                 if (key.objectid != bytenr)
1137                         break;
1138                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1139                         continue;
1140                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1141                                           struct btrfs_extent_ref_v0);
1142                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1143                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1144                         return 1;
1145                 break;
1146         }
1147         return 0;
1148 }
1149 #endif
1150
1151 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1152                             struct metadump_struct *metadump, int root_tree)
1153 {
1154         struct extent_buffer *tmp;
1155         struct btrfs_root_item *ri;
1156         struct btrfs_key key;
1157         struct btrfs_fs_info *fs_info = root->fs_info;
1158         u64 bytenr;
1159         int level;
1160         int nritems = 0;
1161         int i = 0;
1162         int ret;
1163
1164         ret = add_extent(btrfs_header_bytenr(eb), fs_info->nodesize,
1165                          metadump, 0);
1166         if (ret) {
1167                 error("unable to add metadata block %llu: %d",
1168                                 btrfs_header_bytenr(eb), ret);
1169                 return ret;
1170         }
1171
1172         if (btrfs_header_level(eb) == 0 && !root_tree)
1173                 return 0;
1174
1175         level = btrfs_header_level(eb);
1176         nritems = btrfs_header_nritems(eb);
1177         for (i = 0; i < nritems; i++) {
1178                 if (level == 0) {
1179                         btrfs_item_key_to_cpu(eb, &key, i);
1180                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1181                                 continue;
1182                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1183                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1184                         tmp = read_tree_block(fs_info, bytenr, 0);
1185                         if (!extent_buffer_uptodate(tmp)) {
1186                                 error("unable to read log root block");
1187                                 return -EIO;
1188                         }
1189                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1190                         free_extent_buffer(tmp);
1191                         if (ret)
1192                                 return ret;
1193                 } else {
1194                         bytenr = btrfs_node_blockptr(eb, i);
1195                         tmp = read_tree_block(fs_info, bytenr, 0);
1196                         if (!extent_buffer_uptodate(tmp)) {
1197                                 error("unable to read log root block");
1198                                 return -EIO;
1199                         }
1200                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1201                         free_extent_buffer(tmp);
1202                         if (ret)
1203                                 return ret;
1204                 }
1205         }
1206
1207         return 0;
1208 }
1209
1210 static int copy_log_trees(struct btrfs_root *root,
1211                           struct metadump_struct *metadump)
1212 {
1213         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1214
1215         if (blocknr == 0)
1216                 return 0;
1217
1218         if (!root->fs_info->log_root_tree ||
1219             !root->fs_info->log_root_tree->node) {
1220                 error("unable to copy tree log, it has not been setup");
1221                 return -EIO;
1222         }
1223
1224         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1225                                 metadump, 1);
1226 }
1227
1228 static int copy_space_cache(struct btrfs_root *root,
1229                             struct metadump_struct *metadump,
1230                             struct btrfs_path *path)
1231 {
1232         struct extent_buffer *leaf;
1233         struct btrfs_file_extent_item *fi;
1234         struct btrfs_key key;
1235         u64 bytenr, num_bytes;
1236         int ret;
1237
1238         root = root->fs_info->tree_root;
1239
1240         key.objectid = 0;
1241         key.type = BTRFS_EXTENT_DATA_KEY;
1242         key.offset = 0;
1243
1244         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1245         if (ret < 0) {
1246                 error("free space inode not found: %d", ret);
1247                 return ret;
1248         }
1249
1250         leaf = path->nodes[0];
1251
1252         while (1) {
1253                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1254                         ret = btrfs_next_leaf(root, path);
1255                         if (ret < 0) {
1256                                 error("cannot go to next leaf %d", ret);
1257                                 return ret;
1258                         }
1259                         if (ret > 0)
1260                                 break;
1261                         leaf = path->nodes[0];
1262                 }
1263
1264                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1265                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1266                         path->slots[0]++;
1267                         continue;
1268                 }
1269
1270                 fi = btrfs_item_ptr(leaf, path->slots[0],
1271                                     struct btrfs_file_extent_item);
1272                 if (btrfs_file_extent_type(leaf, fi) !=
1273                     BTRFS_FILE_EXTENT_REG) {
1274                         path->slots[0]++;
1275                         continue;
1276                 }
1277
1278                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1279                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1280                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1281                 if (ret) {
1282                         error("unable to add space cache blocks %d", ret);
1283                         btrfs_release_path(path);
1284                         return ret;
1285                 }
1286                 path->slots[0]++;
1287         }
1288
1289         return 0;
1290 }
1291
1292 static int copy_from_extent_tree(struct metadump_struct *metadump,
1293                                  struct btrfs_path *path)
1294 {
1295         struct btrfs_root *extent_root;
1296         struct extent_buffer *leaf;
1297         struct btrfs_extent_item *ei;
1298         struct btrfs_key key;
1299         u64 bytenr;
1300         u64 num_bytes;
1301         int ret;
1302
1303         extent_root = metadump->root->fs_info->extent_root;
1304         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1305         key.objectid = bytenr;
1306         key.type = BTRFS_EXTENT_ITEM_KEY;
1307         key.offset = 0;
1308
1309         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1310         if (ret < 0) {
1311                 error("extent root not found: %d", ret);
1312                 return ret;
1313         }
1314         ret = 0;
1315
1316         leaf = path->nodes[0];
1317
1318         while (1) {
1319                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1320                         ret = btrfs_next_leaf(extent_root, path);
1321                         if (ret < 0) {
1322                                 error("cannot go to next leaf %d", ret);
1323                                 break;
1324                         }
1325                         if (ret > 0) {
1326                                 ret = 0;
1327                                 break;
1328                         }
1329                         leaf = path->nodes[0];
1330                 }
1331
1332                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1333                 if (key.objectid < bytenr ||
1334                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1335                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1336                         path->slots[0]++;
1337                         continue;
1338                 }
1339
1340                 bytenr = key.objectid;
1341                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1342                         num_bytes = extent_root->fs_info->nodesize;
1343                 } else {
1344                         num_bytes = key.offset;
1345                 }
1346
1347                 if (num_bytes == 0) {
1348                         error("extent length 0 at bytenr %llu key type %d",
1349                                         (unsigned long long)bytenr, key.type);
1350                         ret = -EIO;
1351                         break;
1352                 }
1353
1354                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1355                         ei = btrfs_item_ptr(leaf, path->slots[0],
1356                                             struct btrfs_extent_item);
1357                         if (btrfs_extent_flags(leaf, ei) &
1358                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1359                                 ret = add_extent(bytenr, num_bytes, metadump,
1360                                                  0);
1361                                 if (ret) {
1362                                         error("unable to add block %llu: %d",
1363                                                 (unsigned long long)bytenr, ret);
1364                                         break;
1365                                 }
1366                         }
1367                 } else {
1368 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1369                         ret = is_tree_block(extent_root, path, bytenr);
1370                         if (ret < 0) {
1371                                 error("failed to check tree block %llu: %d",
1372                                         (unsigned long long)bytenr, ret);
1373                                 break;
1374                         }
1375
1376                         if (ret) {
1377                                 ret = add_extent(bytenr, num_bytes, metadump,
1378                                                  0);
1379                                 if (ret) {
1380                                         error("unable to add block %llu: %d",
1381                                                 (unsigned long long)bytenr, ret);
1382                                         break;
1383                                 }
1384                         }
1385                         ret = 0;
1386 #else
1387                         error(
1388         "either extent tree is corrupted or you haven't built with V0 support");
1389                         ret = -EIO;
1390                         break;
1391 #endif
1392                 }
1393                 bytenr += num_bytes;
1394         }
1395
1396         btrfs_release_path(path);
1397
1398         return ret;
1399 }
1400
1401 static int create_metadump(const char *input, FILE *out, int num_threads,
1402                            int compress_level, enum sanitize_mode sanitize,
1403                            int walk_trees)
1404 {
1405         struct btrfs_root *root;
1406         struct btrfs_path path;
1407         struct metadump_struct metadump;
1408         int ret;
1409         int err = 0;
1410
1411         root = open_ctree(input, 0, 0);
1412         if (!root) {
1413                 error("open ctree failed");
1414                 return -EIO;
1415         }
1416
1417         ret = metadump_init(&metadump, root, out, num_threads,
1418                             compress_level, sanitize);
1419         if (ret) {
1420                 error("failed to initialize metadump: %d", ret);
1421                 close_ctree(root);
1422                 return ret;
1423         }
1424
1425         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1426                         &metadump, 0);
1427         if (ret) {
1428                 error("unable to add metadata: %d", ret);
1429                 err = ret;
1430                 goto out;
1431         }
1432
1433         btrfs_init_path(&path);
1434
1435         if (walk_trees) {
1436                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1437                                        &metadump, 1);
1438                 if (ret) {
1439                         err = ret;
1440                         goto out;
1441                 }
1442
1443                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1444                                        &metadump, 1);
1445                 if (ret) {
1446                         err = ret;
1447                         goto out;
1448                 }
1449         } else {
1450                 ret = copy_from_extent_tree(&metadump, &path);
1451                 if (ret) {
1452                         err = ret;
1453                         goto out;
1454                 }
1455         }
1456
1457         ret = copy_log_trees(root, &metadump);
1458         if (ret) {
1459                 err = ret;
1460                 goto out;
1461         }
1462
1463         ret = copy_space_cache(root, &metadump, &path);
1464 out:
1465         ret = flush_pending(&metadump, 1);
1466         if (ret) {
1467                 if (!err)
1468                         err = ret;
1469                 error("failed to flush pending data: %d", ret);
1470         }
1471
1472         metadump_destroy(&metadump, num_threads);
1473
1474         btrfs_release_path(&path);
1475         ret = close_ctree(root);
1476         return err ? err : ret;
1477 }
1478
1479 static void update_super_old(u8 *buffer)
1480 {
1481         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1482         struct btrfs_chunk *chunk;
1483         struct btrfs_disk_key *key;
1484         u32 sectorsize = btrfs_super_sectorsize(super);
1485         u64 flags = btrfs_super_flags(super);
1486
1487         flags |= BTRFS_SUPER_FLAG_METADUMP;
1488         btrfs_set_super_flags(super, flags);
1489
1490         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1491         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1492                                        sizeof(struct btrfs_disk_key));
1493
1494         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1495         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1496         btrfs_set_disk_key_offset(key, 0);
1497
1498         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1499         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1500         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1501         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1502         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1503         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1504         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1505         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1506         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1507         chunk->stripe.devid = super->dev_item.devid;
1508         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1509         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1510         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1511         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1512 }
1513
1514 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1515 {
1516         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1517         struct btrfs_chunk *chunk;
1518         struct btrfs_disk_key *disk_key;
1519         struct btrfs_key key;
1520         u64 flags = btrfs_super_flags(super);
1521         u32 new_array_size = 0;
1522         u32 array_size;
1523         u32 cur = 0;
1524         u8 *ptr, *write_ptr;
1525         int old_num_stripes;
1526
1527         write_ptr = ptr = super->sys_chunk_array;
1528         array_size = btrfs_super_sys_array_size(super);
1529
1530         while (cur < array_size) {
1531                 disk_key = (struct btrfs_disk_key *)ptr;
1532                 btrfs_disk_key_to_cpu(&key, disk_key);
1533
1534                 new_array_size += sizeof(*disk_key);
1535                 memmove(write_ptr, ptr, sizeof(*disk_key));
1536
1537                 write_ptr += sizeof(*disk_key);
1538                 ptr += sizeof(*disk_key);
1539                 cur += sizeof(*disk_key);
1540
1541                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1542                         u64 type, physical, physical_dup, size = 0;
1543
1544                         chunk = (struct btrfs_chunk *)ptr;
1545                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1546                         chunk = (struct btrfs_chunk *)write_ptr;
1547
1548                         memmove(write_ptr, ptr, sizeof(*chunk));
1549                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1550                         type = btrfs_stack_chunk_type(chunk);
1551                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1552                                 new_array_size += sizeof(struct btrfs_stripe);
1553                                 write_ptr += sizeof(struct btrfs_stripe);
1554                         } else {
1555                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1556                                 btrfs_set_stack_chunk_type(chunk,
1557                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1558                         }
1559                         chunk->stripe.devid = super->dev_item.devid;
1560                         physical = logical_to_physical(mdres, key.offset,
1561                                                        &size, &physical_dup);
1562                         if (size != (u64)-1)
1563                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1564                                                               physical);
1565                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1566                                BTRFS_UUID_SIZE);
1567                         new_array_size += sizeof(*chunk);
1568                 } else {
1569                         error("bogus key in the sys array %d", key.type);
1570                         return -EIO;
1571                 }
1572                 write_ptr += sizeof(*chunk);
1573                 ptr += btrfs_chunk_item_size(old_num_stripes);
1574                 cur += btrfs_chunk_item_size(old_num_stripes);
1575         }
1576
1577         if (mdres->clear_space_cache)
1578                 btrfs_set_super_cache_generation(super, 0);
1579
1580         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1581         btrfs_set_super_flags(super, flags);
1582         btrfs_set_super_sys_array_size(super, new_array_size);
1583         btrfs_set_super_num_devices(super, 1);
1584         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1585
1586         return 0;
1587 }
1588
1589 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1590 {
1591         struct extent_buffer *eb;
1592
1593         eb = calloc(1, sizeof(struct extent_buffer) + size);
1594         if (!eb)
1595                 return NULL;
1596
1597         eb->start = bytenr;
1598         eb->len = size;
1599         return eb;
1600 }
1601
1602 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1603 {
1604         struct btrfs_item *item;
1605         u32 nritems;
1606         u32 old_size;
1607         u32 old_data_start;
1608         u32 size_diff;
1609         u32 data_end;
1610         int i;
1611
1612         old_size = btrfs_item_size_nr(eb, slot);
1613         if (old_size == new_size)
1614                 return;
1615
1616         nritems = btrfs_header_nritems(eb);
1617         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1618
1619         old_data_start = btrfs_item_offset_nr(eb, slot);
1620         size_diff = old_size - new_size;
1621
1622         for (i = slot; i < nritems; i++) {
1623                 u32 ioff;
1624                 item = btrfs_item_nr(i);
1625                 ioff = btrfs_item_offset(eb, item);
1626                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1627         }
1628
1629         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1630                               btrfs_leaf_data(eb) + data_end,
1631                               old_data_start + new_size - data_end);
1632         item = btrfs_item_nr(slot);
1633         btrfs_set_item_size(eb, item, new_size);
1634 }
1635
1636 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1637                                   struct async_work *async, u8 *buffer,
1638                                   size_t size)
1639 {
1640         struct extent_buffer *eb;
1641         size_t size_left = size;
1642         u64 bytenr = async->start;
1643         int i;
1644
1645         if (size_left % mdres->nodesize)
1646                 return 0;
1647
1648         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1649         if (!eb)
1650                 return -ENOMEM;
1651
1652         while (size_left) {
1653                 eb->start = bytenr;
1654                 memcpy(eb->data, buffer, mdres->nodesize);
1655
1656                 if (btrfs_header_bytenr(eb) != bytenr)
1657                         break;
1658                 if (memcmp(mdres->fsid,
1659                            eb->data + offsetof(struct btrfs_header, fsid),
1660                            BTRFS_FSID_SIZE))
1661                         break;
1662
1663                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1664                         goto next;
1665
1666                 if (btrfs_header_level(eb) != 0)
1667                         goto next;
1668
1669                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1670                         struct btrfs_chunk *chunk;
1671                         struct btrfs_key key;
1672                         u64 type, physical, physical_dup, size = (u64)-1;
1673
1674                         btrfs_item_key_to_cpu(eb, &key, i);
1675                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1676                                 continue;
1677
1678                         size = 0;
1679                         physical = logical_to_physical(mdres, key.offset,
1680                                                        &size, &physical_dup);
1681
1682                         if (!physical_dup)
1683                                 truncate_item(eb, i, sizeof(*chunk));
1684                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1685
1686
1687                         /* Zero out the RAID profile */
1688                         type = btrfs_chunk_type(eb, chunk);
1689                         type &= (BTRFS_BLOCK_GROUP_DATA |
1690                                  BTRFS_BLOCK_GROUP_SYSTEM |
1691                                  BTRFS_BLOCK_GROUP_METADATA |
1692                                  BTRFS_BLOCK_GROUP_DUP);
1693                         btrfs_set_chunk_type(eb, chunk, type);
1694
1695                         if (!physical_dup)
1696                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1697                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1698                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1699                         if (size != (u64)-1)
1700                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1701                                                            physical);
1702                         /* update stripe 2 offset */
1703                         if (physical_dup)
1704                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1705                                                            physical_dup);
1706
1707                         write_extent_buffer(eb, mdres->uuid,
1708                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1709                                                 chunk, 0),
1710                                         BTRFS_UUID_SIZE);
1711                 }
1712                 memcpy(buffer, eb->data, eb->len);
1713                 csum_block(buffer, eb->len);
1714 next:
1715                 size_left -= mdres->nodesize;
1716                 buffer += mdres->nodesize;
1717                 bytenr += mdres->nodesize;
1718         }
1719
1720         free(eb);
1721         return 0;
1722 }
1723
1724 static void write_backup_supers(int fd, u8 *buf)
1725 {
1726         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1727         struct stat st;
1728         u64 size;
1729         u64 bytenr;
1730         int i;
1731         int ret;
1732
1733         if (fstat(fd, &st)) {
1734                 error(
1735         "cannot stat restore point, won't be able to write backup supers: %s",
1736                         strerror(errno));
1737                 return;
1738         }
1739
1740         size = btrfs_device_size(fd, &st);
1741
1742         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1743                 bytenr = btrfs_sb_offset(i);
1744                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1745                         break;
1746                 btrfs_set_super_bytenr(super, bytenr);
1747                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1748                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1749                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1750                         if (ret < 0)
1751                                 error(
1752                                 "problem writing out backup super block %d: %s",
1753                                                 i, strerror(errno));
1754                         else
1755                                 error("short write writing out backup super block");
1756                         break;
1757                 }
1758         }
1759 }
1760
1761 static void *restore_worker(void *data)
1762 {
1763         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1764         struct async_work *async;
1765         size_t size;
1766         u8 *buffer;
1767         u8 *outbuf;
1768         int outfd;
1769         int ret;
1770         int compress_size = MAX_PENDING_SIZE * 4;
1771
1772         outfd = fileno(mdres->out);
1773         buffer = malloc(compress_size);
1774         if (!buffer) {
1775                 error("not enough memory for restore worker buffer");
1776                 pthread_mutex_lock(&mdres->mutex);
1777                 if (!mdres->error)
1778                         mdres->error = -ENOMEM;
1779                 pthread_mutex_unlock(&mdres->mutex);
1780                 pthread_exit(NULL);
1781         }
1782
1783         while (1) {
1784                 u64 bytenr, physical_dup;
1785                 off_t offset = 0;
1786                 int err = 0;
1787
1788                 pthread_mutex_lock(&mdres->mutex);
1789                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1790                         if (mdres->done) {
1791                                 pthread_mutex_unlock(&mdres->mutex);
1792                                 goto out;
1793                         }
1794                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1795                 }
1796                 async = list_entry(mdres->list.next, struct async_work, list);
1797                 list_del_init(&async->list);
1798
1799                 if (mdres->compress_method == COMPRESS_ZLIB) {
1800                         size = compress_size; 
1801                         pthread_mutex_unlock(&mdres->mutex);
1802                         ret = uncompress(buffer, (unsigned long *)&size,
1803                                          async->buffer, async->bufsize);
1804                         pthread_mutex_lock(&mdres->mutex);
1805                         if (ret != Z_OK) {
1806                                 error("decompression failed with %d", ret);
1807                                 err = -EIO;
1808                         }
1809                         outbuf = buffer;
1810                 } else {
1811                         outbuf = async->buffer;
1812                         size = async->bufsize;
1813                 }
1814
1815                 if (!mdres->multi_devices) {
1816                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1817                                 if (mdres->old_restore) {
1818                                         update_super_old(outbuf);
1819                                 } else {
1820                                         ret = update_super(mdres, outbuf);
1821                                         if (ret)
1822                                                 err = ret;
1823                                 }
1824                         } else if (!mdres->old_restore) {
1825                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1826                                 if (ret)
1827                                         err = ret;
1828                         }
1829                 }
1830
1831                 if (!mdres->fixup_offset) {
1832                         while (size) {
1833                                 u64 chunk_size = size;
1834                                 physical_dup = 0;
1835                                 if (!mdres->multi_devices && !mdres->old_restore)
1836                                         bytenr = logical_to_physical(mdres,
1837                                                      async->start + offset,
1838                                                      &chunk_size,
1839                                                      &physical_dup);
1840                                 else
1841                                         bytenr = async->start + offset;
1842
1843                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1844                                                bytenr);
1845                                 if (ret != chunk_size)
1846                                         goto error;
1847
1848                                 if (physical_dup)
1849                                         ret = pwrite64(outfd, outbuf+offset,
1850                                                        chunk_size,
1851                                                        physical_dup);
1852                                 if (ret != chunk_size)
1853                                         goto error;
1854
1855                                 size -= chunk_size;
1856                                 offset += chunk_size;
1857                                 continue;
1858
1859 error:
1860                                 if (ret < 0) {
1861                                         error("unable to write to device: %s",
1862                                                         strerror(errno));
1863                                         err = errno;
1864                                 } else {
1865                                         error("short write");
1866                                         err = -EIO;
1867                                 }
1868                         }
1869                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1870                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1871                         if (ret) {
1872                                 error("failed to write data");
1873                                 exit(1);
1874                         }
1875                 }
1876
1877
1878                 /* backup super blocks are already there at fixup_offset stage */
1879                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1880                         write_backup_supers(outfd, outbuf);
1881
1882                 if (err && !mdres->error)
1883                         mdres->error = err;
1884                 mdres->num_items--;
1885                 pthread_mutex_unlock(&mdres->mutex);
1886
1887                 free(async->buffer);
1888                 free(async);
1889         }
1890 out:
1891         free(buffer);
1892         pthread_exit(NULL);
1893 }
1894
1895 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1896 {
1897         struct rb_node *n;
1898         int i;
1899
1900         while ((n = rb_first(&mdres->chunk_tree))) {
1901                 struct fs_chunk *entry;
1902
1903                 entry = rb_entry(n, struct fs_chunk, l);
1904                 rb_erase(n, &mdres->chunk_tree);
1905                 rb_erase(&entry->p, &mdres->physical_tree);
1906                 free(entry);
1907         }
1908         pthread_mutex_lock(&mdres->mutex);
1909         mdres->done = 1;
1910         pthread_cond_broadcast(&mdres->cond);
1911         pthread_mutex_unlock(&mdres->mutex);
1912
1913         for (i = 0; i < num_threads; i++)
1914                 pthread_join(mdres->threads[i], NULL);
1915
1916         pthread_cond_destroy(&mdres->cond);
1917         pthread_mutex_destroy(&mdres->mutex);
1918 }
1919
1920 static int mdrestore_init(struct mdrestore_struct *mdres,
1921                           FILE *in, FILE *out, int old_restore,
1922                           int num_threads, int fixup_offset,
1923                           struct btrfs_fs_info *info, int multi_devices)
1924 {
1925         int i, ret = 0;
1926
1927         memset(mdres, 0, sizeof(*mdres));
1928         pthread_cond_init(&mdres->cond, NULL);
1929         pthread_mutex_init(&mdres->mutex, NULL);
1930         INIT_LIST_HEAD(&mdres->list);
1931         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1932         mdres->in = in;
1933         mdres->out = out;
1934         mdres->old_restore = old_restore;
1935         mdres->chunk_tree.rb_node = NULL;
1936         mdres->fixup_offset = fixup_offset;
1937         mdres->info = info;
1938         mdres->multi_devices = multi_devices;
1939         mdres->clear_space_cache = 0;
1940         mdres->last_physical_offset = 0;
1941         mdres->alloced_chunks = 0;
1942
1943         if (!num_threads)
1944                 return 0;
1945
1946         mdres->num_threads = num_threads;
1947         for (i = 0; i < num_threads; i++) {
1948                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1949                                      mdres);
1950                 if (ret) {
1951                         /* pthread_create returns errno directly */
1952                         ret = -ret;
1953                         break;
1954                 }
1955         }
1956         if (ret)
1957                 mdrestore_destroy(mdres, i + 1);
1958         return ret;
1959 }
1960
1961 static int fill_mdres_info(struct mdrestore_struct *mdres,
1962                            struct async_work *async)
1963 {
1964         struct btrfs_super_block *super;
1965         u8 *buffer = NULL;
1966         u8 *outbuf;
1967         int ret;
1968
1969         /* We've already been initialized */
1970         if (mdres->nodesize)
1971                 return 0;
1972
1973         if (mdres->compress_method == COMPRESS_ZLIB) {
1974                 size_t size = MAX_PENDING_SIZE * 2;
1975
1976                 buffer = malloc(MAX_PENDING_SIZE * 2);
1977                 if (!buffer)
1978                         return -ENOMEM;
1979                 ret = uncompress(buffer, (unsigned long *)&size,
1980                                  async->buffer, async->bufsize);
1981                 if (ret != Z_OK) {
1982                         error("decompression failed with %d", ret);
1983                         free(buffer);
1984                         return -EIO;
1985                 }
1986                 outbuf = buffer;
1987         } else {
1988                 outbuf = async->buffer;
1989         }
1990
1991         super = (struct btrfs_super_block *)outbuf;
1992         mdres->nodesize = btrfs_super_nodesize(super);
1993         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1994         memcpy(mdres->uuid, super->dev_item.uuid,
1995                        BTRFS_UUID_SIZE);
1996         mdres->devid = le64_to_cpu(super->dev_item.devid);
1997         free(buffer);
1998         return 0;
1999 }
2000
2001 static int add_cluster(struct meta_cluster *cluster,
2002                        struct mdrestore_struct *mdres, u64 *next)
2003 {
2004         struct meta_cluster_item *item;
2005         struct meta_cluster_header *header = &cluster->header;
2006         struct async_work *async;
2007         u64 bytenr;
2008         u32 i, nritems;
2009         int ret;
2010
2011         pthread_mutex_lock(&mdres->mutex);
2012         mdres->compress_method = header->compress;
2013         pthread_mutex_unlock(&mdres->mutex);
2014
2015         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
2016         nritems = le32_to_cpu(header->nritems);
2017         for (i = 0; i < nritems; i++) {
2018                 item = &cluster->items[i];
2019                 async = calloc(1, sizeof(*async));
2020                 if (!async) {
2021                         error("not enough memory for async data");
2022                         return -ENOMEM;
2023                 }
2024                 async->start = le64_to_cpu(item->bytenr);
2025                 async->bufsize = le32_to_cpu(item->size);
2026                 async->buffer = malloc(async->bufsize);
2027                 if (!async->buffer) {
2028                         error("not enough memory for async buffer");
2029                         free(async);
2030                         return -ENOMEM;
2031                 }
2032                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
2033                 if (ret != 1) {
2034                         error("unable to read buffer: %s", strerror(errno));
2035                         free(async->buffer);
2036                         free(async);
2037                         return -EIO;
2038                 }
2039                 bytenr += async->bufsize;
2040
2041                 pthread_mutex_lock(&mdres->mutex);
2042                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
2043                         ret = fill_mdres_info(mdres, async);
2044                         if (ret) {
2045                                 error("unable to set up restore state");
2046                                 pthread_mutex_unlock(&mdres->mutex);
2047                                 free(async->buffer);
2048                                 free(async);
2049                                 return ret;
2050                         }
2051                 }
2052                 list_add_tail(&async->list, &mdres->list);
2053                 mdres->num_items++;
2054                 pthread_cond_signal(&mdres->cond);
2055                 pthread_mutex_unlock(&mdres->mutex);
2056         }
2057         if (bytenr & BLOCK_MASK) {
2058                 char buffer[BLOCK_MASK];
2059                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
2060
2061                 bytenr += size;
2062                 ret = fread(buffer, size, 1, mdres->in);
2063                 if (ret != 1) {
2064                         error("failed to read buffer: %s", strerror(errno));
2065                         return -EIO;
2066                 }
2067         }
2068         *next = bytenr;
2069         return 0;
2070 }
2071
2072 static int wait_for_worker(struct mdrestore_struct *mdres)
2073 {
2074         int ret = 0;
2075
2076         pthread_mutex_lock(&mdres->mutex);
2077         ret = mdres->error;
2078         while (!ret && mdres->num_items > 0) {
2079                 struct timespec ts = {
2080                         .tv_sec = 0,
2081                         .tv_nsec = 10000000,
2082                 };
2083                 pthread_mutex_unlock(&mdres->mutex);
2084                 nanosleep(&ts, NULL);
2085                 pthread_mutex_lock(&mdres->mutex);
2086                 ret = mdres->error;
2087         }
2088         pthread_mutex_unlock(&mdres->mutex);
2089         return ret;
2090 }
2091
2092 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2093                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2094                             u64 cluster_bytenr)
2095 {
2096         struct extent_buffer *eb;
2097         int ret = 0;
2098         int i;
2099
2100         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2101         if (!eb) {
2102                 ret = -ENOMEM;
2103                 goto out;
2104         }
2105
2106         while (item_bytenr != bytenr) {
2107                 buffer += mdres->nodesize;
2108                 item_bytenr += mdres->nodesize;
2109         }
2110
2111         memcpy(eb->data, buffer, mdres->nodesize);
2112         if (btrfs_header_bytenr(eb) != bytenr) {
2113                 error("eb bytenr does not match found bytenr: %llu != %llu",
2114                                 (unsigned long long)btrfs_header_bytenr(eb),
2115                                 (unsigned long long)bytenr);
2116                 ret = -EIO;
2117                 goto out;
2118         }
2119
2120         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2121                    BTRFS_FSID_SIZE)) {
2122                 error("filesystem UUID of eb %llu does not match",
2123                                 (unsigned long long)bytenr);
2124                 ret = -EIO;
2125                 goto out;
2126         }
2127
2128         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2129                 error("wrong eb %llu owner %llu",
2130                                 (unsigned long long)bytenr,
2131                                 (unsigned long long)btrfs_header_owner(eb));
2132                 ret = -EIO;
2133                 goto out;
2134         }
2135
2136         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2137                 struct btrfs_chunk *chunk;
2138                 struct fs_chunk *fs_chunk;
2139                 struct btrfs_key key;
2140                 u64 type;
2141
2142                 if (btrfs_header_level(eb)) {
2143                         u64 blockptr = btrfs_node_blockptr(eb, i);
2144
2145                         ret = search_for_chunk_blocks(mdres, blockptr,
2146                                                       cluster_bytenr);
2147                         if (ret)
2148                                 break;
2149                         continue;
2150                 }
2151
2152                 /* Yay a leaf!  We loves leafs! */
2153                 btrfs_item_key_to_cpu(eb, &key, i);
2154                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2155                         continue;
2156
2157                 fs_chunk = malloc(sizeof(struct fs_chunk));
2158                 if (!fs_chunk) {
2159                         error("not enough memory to allocate chunk");
2160                         ret = -ENOMEM;
2161                         break;
2162                 }
2163                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2164                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2165
2166                 fs_chunk->logical = key.offset;
2167                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2168                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2169                 INIT_LIST_HEAD(&fs_chunk->list);
2170                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2171                                 physical_cmp, 1) != NULL)
2172                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2173                 else
2174                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2175                                     physical_cmp);
2176
2177                 type = btrfs_chunk_type(eb, chunk);
2178                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2179                         fs_chunk->physical_dup =
2180                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2181                 }
2182
2183                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2184                     mdres->last_physical_offset)
2185                         mdres->last_physical_offset = fs_chunk->physical_dup +
2186                                 fs_chunk->bytes;
2187                 else if (fs_chunk->physical + fs_chunk->bytes >
2188                     mdres->last_physical_offset)
2189                         mdres->last_physical_offset = fs_chunk->physical +
2190                                 fs_chunk->bytes;
2191                 mdres->alloced_chunks += fs_chunk->bytes;
2192                 /* in dup case, fs_chunk->bytes should add twice */
2193                 if (fs_chunk->physical_dup)
2194                         mdres->alloced_chunks += fs_chunk->bytes;
2195                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2196         }
2197 out:
2198         free(eb);
2199         return ret;
2200 }
2201
2202 /* If you have to ask you aren't worthy */
2203 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2204                                    u64 search, u64 cluster_bytenr)
2205 {
2206         struct meta_cluster *cluster;
2207         struct meta_cluster_header *header;
2208         struct meta_cluster_item *item;
2209         u64 current_cluster = cluster_bytenr, bytenr;
2210         u64 item_bytenr;
2211         u32 bufsize, nritems, i;
2212         u32 max_size = MAX_PENDING_SIZE * 2;
2213         u8 *buffer, *tmp = NULL;
2214         int ret = 0;
2215
2216         cluster = malloc(BLOCK_SIZE);
2217         if (!cluster) {
2218                 error("not enough memory for cluster");
2219                 return -ENOMEM;
2220         }
2221
2222         buffer = malloc(max_size);
2223         if (!buffer) {
2224                 error("not enough memory for buffer");
2225                 free(cluster);
2226                 return -ENOMEM;
2227         }
2228
2229         if (mdres->compress_method == COMPRESS_ZLIB) {
2230                 tmp = malloc(max_size);
2231                 if (!tmp) {
2232                         error("not enough memory for buffer");
2233                         free(cluster);
2234                         free(buffer);
2235                         return -ENOMEM;
2236                 }
2237         }
2238
2239         bytenr = current_cluster;
2240         while (1) {
2241                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2242                         error("seek failed: %s", strerror(errno));
2243                         ret = -EIO;
2244                         break;
2245                 }
2246
2247                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2248                 if (ret == 0) {
2249                         if (cluster_bytenr != 0) {
2250                                 cluster_bytenr = 0;
2251                                 current_cluster = 0;
2252                                 bytenr = 0;
2253                                 continue;
2254                         }
2255                         error(
2256         "unknown state after reading cluster at %llu, probably corrupted data",
2257                                         cluster_bytenr);
2258                         ret = -EIO;
2259                         break;
2260                 } else if (ret < 0) {
2261                         error("unable to read image at %llu: %s",
2262                                         (unsigned long long)cluster_bytenr,
2263                                         strerror(errno));
2264                         break;
2265                 }
2266                 ret = 0;
2267
2268                 header = &cluster->header;
2269                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2270                     le64_to_cpu(header->bytenr) != current_cluster) {
2271                         error("bad header in metadump image");
2272                         ret = -EIO;
2273                         break;
2274                 }
2275
2276                 bytenr += BLOCK_SIZE;
2277                 nritems = le32_to_cpu(header->nritems);
2278                 for (i = 0; i < nritems; i++) {
2279                         size_t size;
2280
2281                         item = &cluster->items[i];
2282                         bufsize = le32_to_cpu(item->size);
2283                         item_bytenr = le64_to_cpu(item->bytenr);
2284
2285                         if (bufsize > max_size) {
2286                                 error("item %u too big: %u > %u", i, bufsize,
2287                                                 max_size);
2288                                 ret = -EIO;
2289                                 break;
2290                         }
2291
2292                         if (mdres->compress_method == COMPRESS_ZLIB) {
2293                                 ret = fread(tmp, bufsize, 1, mdres->in);
2294                                 if (ret != 1) {
2295                                         error("read error: %s", strerror(errno));
2296                                         ret = -EIO;
2297                                         break;
2298                                 }
2299
2300                                 size = max_size;
2301                                 ret = uncompress(buffer,
2302                                                  (unsigned long *)&size, tmp,
2303                                                  bufsize);
2304                                 if (ret != Z_OK) {
2305                                         error("decompression failed with %d",
2306                                                         ret);
2307                                         ret = -EIO;
2308                                         break;
2309                                 }
2310                         } else {
2311                                 ret = fread(buffer, bufsize, 1, mdres->in);
2312                                 if (ret != 1) {
2313                                         error("read error: %s",
2314                                                         strerror(errno));
2315                                         ret = -EIO;
2316                                         break;
2317                                 }
2318                                 size = bufsize;
2319                         }
2320                         ret = 0;
2321
2322                         if (item_bytenr <= search &&
2323                             item_bytenr + size > search) {
2324                                 ret = read_chunk_block(mdres, buffer, search,
2325                                                        item_bytenr, size,
2326                                                        current_cluster);
2327                                 if (!ret)
2328                                         ret = 1;
2329                                 break;
2330                         }
2331                         bytenr += bufsize;
2332                 }
2333                 if (ret) {
2334                         if (ret > 0)
2335                                 ret = 0;
2336                         break;
2337                 }
2338                 if (bytenr & BLOCK_MASK)
2339                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2340                 current_cluster = bytenr;
2341         }
2342
2343         free(tmp);
2344         free(buffer);
2345         free(cluster);
2346         return ret;
2347 }
2348
2349 static int build_chunk_tree(struct mdrestore_struct *mdres,
2350                             struct meta_cluster *cluster)
2351 {
2352         struct btrfs_super_block *super;
2353         struct meta_cluster_header *header;
2354         struct meta_cluster_item *item = NULL;
2355         u64 chunk_root_bytenr = 0;
2356         u32 i, nritems;
2357         u64 bytenr = 0;
2358         u8 *buffer;
2359         int ret;
2360
2361         /* We can't seek with stdin so don't bother doing this */
2362         if (mdres->in == stdin)
2363                 return 0;
2364
2365         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2366         if (ret <= 0) {
2367                 error("unable to read cluster: %s", strerror(errno));
2368                 return -EIO;
2369         }
2370         ret = 0;
2371
2372         header = &cluster->header;
2373         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2374             le64_to_cpu(header->bytenr) != 0) {
2375                 error("bad header in metadump image");
2376                 return -EIO;
2377         }
2378
2379         bytenr += BLOCK_SIZE;
2380         mdres->compress_method = header->compress;
2381         nritems = le32_to_cpu(header->nritems);
2382         for (i = 0; i < nritems; i++) {
2383                 item = &cluster->items[i];
2384
2385                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2386                         break;
2387                 bytenr += le32_to_cpu(item->size);
2388                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2389                         error("seek failed: %s", strerror(errno));
2390                         return -EIO;
2391                 }
2392         }
2393
2394         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2395                 error("did not find superblock at %llu",
2396                                 le64_to_cpu(item->bytenr));
2397                 return -EINVAL;
2398         }
2399
2400         buffer = malloc(le32_to_cpu(item->size));
2401         if (!buffer) {
2402                 error("not enough memory to allocate buffer");
2403                 return -ENOMEM;
2404         }
2405
2406         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2407         if (ret != 1) {
2408                 error("unable to read buffer: %s", strerror(errno));
2409                 free(buffer);
2410                 return -EIO;
2411         }
2412
2413         if (mdres->compress_method == COMPRESS_ZLIB) {
2414                 size_t size = MAX_PENDING_SIZE * 2;
2415                 u8 *tmp;
2416
2417                 tmp = malloc(MAX_PENDING_SIZE * 2);
2418                 if (!tmp) {
2419                         free(buffer);
2420                         return -ENOMEM;
2421                 }
2422                 ret = uncompress(tmp, (unsigned long *)&size,
2423                                  buffer, le32_to_cpu(item->size));
2424                 if (ret != Z_OK) {
2425                         error("decompression failed with %d", ret);
2426                         free(buffer);
2427                         free(tmp);
2428                         return -EIO;
2429                 }
2430                 free(buffer);
2431                 buffer = tmp;
2432         }
2433
2434         pthread_mutex_lock(&mdres->mutex);
2435         super = (struct btrfs_super_block *)buffer;
2436         chunk_root_bytenr = btrfs_super_chunk_root(super);
2437         mdres->nodesize = btrfs_super_nodesize(super);
2438         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2439         memcpy(mdres->uuid, super->dev_item.uuid,
2440                        BTRFS_UUID_SIZE);
2441         mdres->devid = le64_to_cpu(super->dev_item.devid);
2442         free(buffer);
2443         pthread_mutex_unlock(&mdres->mutex);
2444
2445         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2446 }
2447
2448 static int range_contains_super(u64 physical, u64 bytes)
2449 {
2450         u64 super_bytenr;
2451         int i;
2452
2453         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2454                 super_bytenr = btrfs_sb_offset(i);
2455                 if (super_bytenr >= physical &&
2456                     super_bytenr < physical + bytes)
2457                         return 1;
2458         }
2459
2460         return 0;
2461 }
2462
2463 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2464 {
2465         struct fs_chunk *fs_chunk;
2466
2467         while (!list_empty(&mdres->overlapping_chunks)) {
2468                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2469                                             struct fs_chunk, list);
2470                 list_del_init(&fs_chunk->list);
2471                 if (range_contains_super(fs_chunk->physical,
2472                                          fs_chunk->bytes)) {
2473                         warning(
2474 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2475                         mdres->clear_space_cache = 1;
2476                 }
2477                 fs_chunk->physical = mdres->last_physical_offset;
2478                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2479                 mdres->last_physical_offset += fs_chunk->bytes;
2480         }
2481 }
2482
2483 static int fixup_devices(struct btrfs_fs_info *fs_info,
2484                          struct mdrestore_struct *mdres, off_t dev_size)
2485 {
2486         struct btrfs_trans_handle *trans;
2487         struct btrfs_dev_item *dev_item;
2488         struct btrfs_path path;
2489         struct extent_buffer *leaf;
2490         struct btrfs_root *root = fs_info->chunk_root;
2491         struct btrfs_key key;
2492         u64 devid, cur_devid;
2493         int ret;
2494
2495         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2496         if (IS_ERR(trans)) {
2497                 error("cannot starting transaction %ld", PTR_ERR(trans));
2498                 return PTR_ERR(trans);
2499         }
2500
2501         dev_item = &fs_info->super_copy->dev_item;
2502
2503         devid = btrfs_stack_device_id(dev_item);
2504
2505         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2506         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2507
2508         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2509         key.type = BTRFS_DEV_ITEM_KEY;
2510         key.offset = 0;
2511
2512         btrfs_init_path(&path);
2513
2514 again:
2515         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2516         if (ret < 0) {
2517                 error("search failed: %d", ret);
2518                 exit(1);
2519         }
2520
2521         while (1) {
2522                 leaf = path.nodes[0];
2523                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2524                         ret = btrfs_next_leaf(root, &path);
2525                         if (ret < 0) {
2526                                 error("cannot go to next leaf %d", ret);
2527                                 exit(1);
2528                         }
2529                         if (ret > 0) {
2530                                 ret = 0;
2531                                 break;
2532                         }
2533                         leaf = path.nodes[0];
2534                 }
2535
2536                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2537                 if (key.type > BTRFS_DEV_ITEM_KEY)
2538                         break;
2539                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2540                         path.slots[0]++;
2541                         continue;
2542                 }
2543
2544                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2545                                           struct btrfs_dev_item);
2546                 cur_devid = btrfs_device_id(leaf, dev_item);
2547                 if (devid != cur_devid) {
2548                         ret = btrfs_del_item(trans, root, &path);
2549                         if (ret) {
2550                                 error("cannot delete item: %d", ret);
2551                                 exit(1);
2552                         }
2553                         btrfs_release_path(&path);
2554                         goto again;
2555                 }
2556
2557                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2558                 btrfs_set_device_bytes_used(leaf, dev_item,
2559                                             mdres->alloced_chunks);
2560                 btrfs_mark_buffer_dirty(leaf);
2561                 path.slots[0]++;
2562         }
2563
2564         btrfs_release_path(&path);
2565         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2566         if (ret) {
2567                 error("unable to commit transaction: %d", ret);
2568                 return ret;
2569         }
2570         return 0;
2571 }
2572
2573 static int restore_metadump(const char *input, FILE *out, int old_restore,
2574                             int num_threads, int fixup_offset,
2575                             const char *target, int multi_devices)
2576 {
2577         struct meta_cluster *cluster = NULL;
2578         struct meta_cluster_header *header;
2579         struct mdrestore_struct mdrestore;
2580         struct btrfs_fs_info *info = NULL;
2581         u64 bytenr = 0;
2582         FILE *in = NULL;
2583         int ret = 0;
2584
2585         if (!strcmp(input, "-")) {
2586                 in = stdin;
2587         } else {
2588                 in = fopen(input, "r");
2589                 if (!in) {
2590                         error("unable to open metadump image: %s",
2591                                         strerror(errno));
2592                         return 1;
2593                 }
2594         }
2595
2596         /* NOTE: open with write mode */
2597         if (fixup_offset) {
2598                 info = open_ctree_fs_info(target, 0, 0, 0,
2599                                           OPEN_CTREE_WRITES |
2600                                           OPEN_CTREE_RESTORE |
2601                                           OPEN_CTREE_PARTIAL);
2602                 if (!info) {
2603                         error("open ctree failed");
2604                         ret = -EIO;
2605                         goto failed_open;
2606                 }
2607         }
2608
2609         cluster = malloc(BLOCK_SIZE);
2610         if (!cluster) {
2611                 error("not enough memory for cluster");
2612                 ret = -ENOMEM;
2613                 goto failed_info;
2614         }
2615
2616         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2617                              fixup_offset, info, multi_devices);
2618         if (ret) {
2619                 error("failed to initialize metadata restore state: %d", ret);
2620                 goto failed_cluster;
2621         }
2622
2623         if (!multi_devices && !old_restore) {
2624                 ret = build_chunk_tree(&mdrestore, cluster);
2625                 if (ret)
2626                         goto out;
2627                 if (!list_empty(&mdrestore.overlapping_chunks))
2628                         remap_overlapping_chunks(&mdrestore);
2629         }
2630
2631         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2632                 error("seek failed: %s", strerror(errno));
2633                 goto out;
2634         }
2635
2636         while (!mdrestore.error) {
2637                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2638                 if (!ret)
2639                         break;
2640
2641                 header = &cluster->header;
2642                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2643                     le64_to_cpu(header->bytenr) != bytenr) {
2644                         error("bad header in metadump image");
2645                         ret = -EIO;
2646                         break;
2647                 }
2648                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2649                 if (ret) {
2650                         error("failed to add cluster: %d", ret);
2651                         break;
2652                 }
2653         }
2654         ret = wait_for_worker(&mdrestore);
2655
2656         if (!ret && !multi_devices && !old_restore) {
2657                 struct btrfs_root *root;
2658                 struct stat st;
2659
2660                 root = open_ctree_fd(fileno(out), target, 0,
2661                                           OPEN_CTREE_PARTIAL |
2662                                           OPEN_CTREE_WRITES |
2663                                           OPEN_CTREE_NO_DEVICES);
2664                 if (!root) {
2665                         error("open ctree failed in %s", target);
2666                         ret = -EIO;
2667                         goto out;
2668                 }
2669                 info = root->fs_info;
2670
2671                 if (stat(target, &st)) {
2672                         error("stat %s failed: %s", target, strerror(errno));
2673                         close_ctree(info->chunk_root);
2674                         free(cluster);
2675                         return 1;
2676                 }
2677
2678                 ret = fixup_devices(info, &mdrestore, st.st_size);
2679                 close_ctree(info->chunk_root);
2680                 if (ret)
2681                         goto out;
2682         }
2683 out:
2684         mdrestore_destroy(&mdrestore, num_threads);
2685 failed_cluster:
2686         free(cluster);
2687 failed_info:
2688         if (fixup_offset && info)
2689                 close_ctree(info->chunk_root);
2690 failed_open:
2691         if (in != stdin)
2692                 fclose(in);
2693         return ret;
2694 }
2695
2696 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2697                                        const char *other_dev, u64 cur_devid)
2698 {
2699         struct btrfs_key key;
2700         struct extent_buffer *leaf;
2701         struct btrfs_path path;
2702         struct btrfs_dev_item *dev_item;
2703         struct btrfs_super_block *disk_super;
2704         char dev_uuid[BTRFS_UUID_SIZE];
2705         char fs_uuid[BTRFS_UUID_SIZE];
2706         u64 devid, type, io_align, io_width;
2707         u64 sector_size, total_bytes, bytes_used;
2708         char buf[BTRFS_SUPER_INFO_SIZE];
2709         int fp = -1;
2710         int ret;
2711
2712         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2713         key.type = BTRFS_DEV_ITEM_KEY;
2714         key.offset = cur_devid;
2715
2716         btrfs_init_path(&path);
2717         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2718         if (ret) {
2719                 error("search key failed: %d", ret);
2720                 ret = -EIO;
2721                 goto out;
2722         }
2723
2724         leaf = path.nodes[0];
2725         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2726                                   struct btrfs_dev_item);
2727
2728         devid = btrfs_device_id(leaf, dev_item);
2729         if (devid != cur_devid) {
2730                 error("devid mismatch: %llu != %llu",
2731                                 (unsigned long long)devid,
2732                                 (unsigned long long)cur_devid);
2733                 ret = -EIO;
2734                 goto out;
2735         }
2736
2737         type = btrfs_device_type(leaf, dev_item);
2738         io_align = btrfs_device_io_align(leaf, dev_item);
2739         io_width = btrfs_device_io_width(leaf, dev_item);
2740         sector_size = btrfs_device_sector_size(leaf, dev_item);
2741         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2742         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2743         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2744         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2745
2746         btrfs_release_path(&path);
2747
2748         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2749
2750         /* update other devices' super block */
2751         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2752         if (fp < 0) {
2753                 error("could not open %s: %s", other_dev, strerror(errno));
2754                 ret = -EIO;
2755                 goto out;
2756         }
2757
2758         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2759
2760         disk_super = (struct btrfs_super_block *)buf;
2761         dev_item = &disk_super->dev_item;
2762
2763         btrfs_set_stack_device_type(dev_item, type);
2764         btrfs_set_stack_device_id(dev_item, devid);
2765         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2766         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2767         btrfs_set_stack_device_io_align(dev_item, io_align);
2768         btrfs_set_stack_device_io_width(dev_item, io_width);
2769         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2770         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2771         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2772         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2773
2774         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2775         if (ret != BTRFS_SUPER_INFO_SIZE) {
2776                 if (ret < 0)
2777                         error("cannot write superblock: %s", strerror(ret));
2778                 else
2779                         error("cannot write superblock");
2780                 ret = -EIO;
2781                 goto out;
2782         }
2783
2784         write_backup_supers(fp, (u8 *)buf);
2785
2786 out:
2787         if (fp != -1)
2788                 close(fp);
2789         return ret;
2790 }
2791
2792 static void print_usage(int ret)
2793 {
2794         printf("usage: btrfs-image [options] source target\n");
2795         printf("\t-r      \trestore metadump image\n");
2796         printf("\t-c value\tcompression level (0 ~ 9)\n");
2797         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2798         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2799         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2800         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2801         printf("\t-m       \trestore for multiple devices\n");
2802         printf("\n");
2803         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2804         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2805         exit(ret);
2806 }
2807
2808 int main(int argc, char *argv[])
2809 {
2810         char *source;
2811         char *target;
2812         u64 num_threads = 0;
2813         u64 compress_level = 0;
2814         int create = 1;
2815         int old_restore = 0;
2816         int walk_trees = 0;
2817         int multi_devices = 0;
2818         int ret;
2819         enum sanitize_mode sanitize = SANITIZE_NONE;
2820         int dev_cnt = 0;
2821         int usage_error = 0;
2822         FILE *out;
2823
2824         while (1) {
2825                 static const struct option long_options[] = {
2826                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2827                         { NULL, 0, NULL, 0 }
2828                 };
2829                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2830                 if (c < 0)
2831                         break;
2832                 switch (c) {
2833                 case 'r':
2834                         create = 0;
2835                         break;
2836                 case 't':
2837                         num_threads = arg_strtou64(optarg);
2838                         if (num_threads > MAX_WORKER_THREADS) {
2839                                 error("number of threads out of range: %llu > %d",
2840                                         (unsigned long long)num_threads,
2841                                         MAX_WORKER_THREADS);
2842                                 return 1;
2843                         }
2844                         break;
2845                 case 'c':
2846                         compress_level = arg_strtou64(optarg);
2847                         if (compress_level > 9) {
2848                                 error("compression level out of range: %llu",
2849                                         (unsigned long long)compress_level);
2850                                 return 1;
2851                         }
2852                         break;
2853                 case 'o':
2854                         old_restore = 1;
2855                         break;
2856                 case 's':
2857                         if (sanitize == SANITIZE_NONE)
2858                                 sanitize = SANITIZE_NAMES;
2859                         else if (sanitize == SANITIZE_NAMES)
2860                                 sanitize = SANITIZE_COLLISIONS;
2861                         break;
2862                 case 'w':
2863                         walk_trees = 1;
2864                         break;
2865                 case 'm':
2866                         create = 0;
2867                         multi_devices = 1;
2868                         break;
2869                         case GETOPT_VAL_HELP:
2870                 default:
2871                         print_usage(c != GETOPT_VAL_HELP);
2872                 }
2873         }
2874
2875         set_argv0(argv);
2876         if (check_argc_min(argc - optind, 2))
2877                 print_usage(1);
2878
2879         dev_cnt = argc - optind - 1;
2880
2881         if (create) {
2882                 if (old_restore) {
2883                         error(
2884                         "create and restore cannot be used at the same time");
2885                         usage_error++;
2886                 }
2887         } else {
2888                 if (walk_trees || sanitize != SANITIZE_NONE || compress_level) {
2889                         error(
2890                         "useing -w, -s, -c options for restore makes no sense");
2891                         usage_error++;
2892                 }
2893                 if (multi_devices && dev_cnt < 2) {
2894                         error("not enough devices specified for -m option");
2895                         usage_error++;
2896                 }
2897                 if (!multi_devices && dev_cnt != 1) {
2898                         error("accepts only 1 device without -m option");
2899                         usage_error++;
2900                 }
2901         }
2902
2903         if (usage_error)
2904                 print_usage(1);
2905
2906         source = argv[optind];
2907         target = argv[optind + 1];
2908
2909         if (create && !strcmp(target, "-")) {
2910                 out = stdout;
2911         } else {
2912                 out = fopen(target, "w+");
2913                 if (!out) {
2914                         error("unable to create target file %s", target);
2915                         exit(1);
2916                 }
2917         }
2918
2919         if (compress_level > 0 || create == 0) {
2920                 if (num_threads == 0) {
2921                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2922
2923                         if (tmp <= 0)
2924                                 tmp = 1;
2925                         num_threads = tmp;
2926                 }
2927         } else {
2928                 num_threads = 0;
2929         }
2930
2931         if (create) {
2932                 ret = check_mounted(source);
2933                 if (ret < 0) {
2934                         warning("unable to check mount status of: %s",
2935                                         strerror(-ret));
2936                 } else if (ret) {
2937                         warning("%s already mounted, results may be inaccurate",
2938                                         source);
2939                 }
2940
2941                 ret = create_metadump(source, out, num_threads,
2942                                       compress_level, sanitize, walk_trees);
2943         } else {
2944                 ret = restore_metadump(source, out, old_restore, num_threads,
2945                                        0, target, multi_devices);
2946         }
2947         if (ret) {
2948                 error("%s failed: %s", (create) ? "create" : "restore",
2949                        strerror(errno));
2950                 goto out;
2951         }
2952
2953          /* extended support for multiple devices */
2954         if (!create && multi_devices) {
2955                 struct btrfs_fs_info *info;
2956                 u64 total_devs;
2957                 int i;
2958
2959                 info = open_ctree_fs_info(target, 0, 0, 0,
2960                                           OPEN_CTREE_PARTIAL |
2961                                           OPEN_CTREE_RESTORE);
2962                 if (!info) {
2963                         error("open ctree failed at %s", target);
2964                         return 1;
2965                 }
2966
2967                 total_devs = btrfs_super_num_devices(info->super_copy);
2968                 if (total_devs != dev_cnt) {
2969                         error("it needs %llu devices but has only %d",
2970                                 total_devs, dev_cnt);
2971                         close_ctree(info->chunk_root);
2972                         goto out;
2973                 }
2974
2975                 /* update super block on other disks */
2976                 for (i = 2; i <= dev_cnt; i++) {
2977                         ret = update_disk_super_on_device(info,
2978                                         argv[optind + i], (u64)i);
2979                         if (ret) {
2980                                 error("update disk superblock failed devid %d: %d",
2981                                         i, ret);
2982                                 close_ctree(info->chunk_root);
2983                                 exit(1);
2984                         }
2985                 }
2986
2987                 close_ctree(info->chunk_root);
2988
2989                 /* fix metadata block to map correct chunk */
2990                 ret = restore_metadump(source, out, 0, num_threads, 1,
2991                                        target, 1);
2992                 if (ret) {
2993                         error("unable to fixup metadump: %d", ret);
2994                         exit(1);
2995                 }
2996         }
2997 out:
2998         if (out == stdout) {
2999                 fflush(out);
3000         } else {
3001                 fclose(out);
3002                 if (ret && create) {
3003                         int unlink_ret;
3004
3005                         unlink_ret = unlink(target);
3006                         if (unlink_ret)
3007                                 error("unlink output file %s failed: %s",
3008                                                 target, strerror(errno));
3009                 }
3010         }
3011
3012         btrfs_close_all_devices();
3013
3014         return !!ret;
3015 }