btrfs-progs: image: move metadump definitions to own header
[platform/upstream/btrfs-progs.git] / image / main.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38 #include "help.h"
39 #include "image/metadump.h"
40
41 #define MAX_WORKER_THREADS      (32)
42
43 struct async_work {
44         struct list_head list;
45         struct list_head ordered;
46         u64 start;
47         u64 size;
48         u8 *buffer;
49         size_t bufsize;
50         int error;
51 };
52
53 struct metadump_struct {
54         struct btrfs_root *root;
55         FILE *out;
56
57         union {
58                 struct meta_cluster cluster;
59                 char meta_cluster_bytes[BLOCK_SIZE];
60         };
61
62         pthread_t threads[MAX_WORKER_THREADS];
63         size_t num_threads;
64         pthread_mutex_t mutex;
65         pthread_cond_t cond;
66         struct rb_root name_tree;
67
68         struct list_head list;
69         struct list_head ordered;
70         size_t num_items;
71         size_t num_ready;
72
73         u64 pending_start;
74         u64 pending_size;
75
76         int compress_level;
77         int done;
78         int data;
79         int sanitize_names;
80
81         int error;
82 };
83
84 struct name {
85         struct rb_node n;
86         char *val;
87         char *sub;
88         u32 len;
89 };
90
91 struct mdrestore_struct {
92         FILE *in;
93         FILE *out;
94
95         pthread_t threads[MAX_WORKER_THREADS];
96         size_t num_threads;
97         pthread_mutex_t mutex;
98         pthread_cond_t cond;
99
100         struct rb_root chunk_tree;
101         struct rb_root physical_tree;
102         struct list_head list;
103         struct list_head overlapping_chunks;
104         size_t num_items;
105         u32 nodesize;
106         u64 devid;
107         u64 alloced_chunks;
108         u64 last_physical_offset;
109         u8 uuid[BTRFS_UUID_SIZE];
110         u8 fsid[BTRFS_FSID_SIZE];
111
112         int compress_method;
113         int done;
114         int error;
115         int old_restore;
116         int fixup_offset;
117         int multi_devices;
118         int clear_space_cache;
119         struct btrfs_fs_info *info;
120 };
121
122 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
123                                    u64 search, u64 cluster_bytenr);
124 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
125
126 static void csum_block(u8 *buf, size_t len)
127 {
128         u8 result[BTRFS_CRC32_SIZE];
129         u32 crc = ~(u32)0;
130         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
131         btrfs_csum_final(crc, result);
132         memcpy(buf, result, BTRFS_CRC32_SIZE);
133 }
134
135 static int has_name(struct btrfs_key *key)
136 {
137         switch (key->type) {
138         case BTRFS_DIR_ITEM_KEY:
139         case BTRFS_DIR_INDEX_KEY:
140         case BTRFS_INODE_REF_KEY:
141         case BTRFS_INODE_EXTREF_KEY:
142         case BTRFS_XATTR_ITEM_KEY:
143                 return 1;
144         default:
145                 break;
146         }
147
148         return 0;
149 }
150
151 static char *generate_garbage(u32 name_len)
152 {
153         char *buf = malloc(name_len);
154         int i;
155
156         if (!buf)
157                 return NULL;
158
159         for (i = 0; i < name_len; i++) {
160                 char c = rand_range(94) + 33;
161
162                 if (c == '/')
163                         c++;
164                 buf[i] = c;
165         }
166
167         return buf;
168 }
169
170 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
171 {
172         struct name *entry = rb_entry(a, struct name, n);
173         struct name *ins = rb_entry(b, struct name, n);
174         u32 len;
175
176         len = min(ins->len, entry->len);
177         return memcmp(ins->val, entry->val, len);
178 }
179
180 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
181 {
182         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
183         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
184
185         if (fuzz && ins->logical >= entry->logical &&
186             ins->logical < entry->logical + entry->bytes)
187                 return 0;
188
189         if (ins->logical < entry->logical)
190                 return -1;
191         else if (ins->logical > entry->logical)
192                 return 1;
193         return 0;
194 }
195
196 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
197 {
198         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
199         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
200
201         if (fuzz && ins->physical >= entry->physical &&
202             ins->physical < entry->physical + entry->bytes)
203                 return 0;
204
205         if (fuzz && entry->physical >= ins->physical &&
206             entry->physical < ins->physical + ins->bytes)
207                 return 0;
208
209         if (ins->physical < entry->physical)
210                 return -1;
211         else if (ins->physical > entry->physical)
212                 return 1;
213         return 0;
214 }
215
216 static void tree_insert(struct rb_root *root, struct rb_node *ins,
217                         int (*cmp)(struct rb_node *a, struct rb_node *b,
218                                    int fuzz))
219 {
220         struct rb_node ** p = &root->rb_node;
221         struct rb_node * parent = NULL;
222         int dir;
223
224         while(*p) {
225                 parent = *p;
226
227                 dir = cmp(*p, ins, 1);
228                 if (dir < 0)
229                         p = &(*p)->rb_left;
230                 else if (dir > 0)
231                         p = &(*p)->rb_right;
232                 else
233                         BUG();
234         }
235
236         rb_link_node(ins, parent, p);
237         rb_insert_color(ins, root);
238 }
239
240 static struct rb_node *tree_search(struct rb_root *root,
241                                    struct rb_node *search,
242                                    int (*cmp)(struct rb_node *a,
243                                               struct rb_node *b, int fuzz),
244                                    int fuzz)
245 {
246         struct rb_node *n = root->rb_node;
247         int dir;
248
249         while (n) {
250                 dir = cmp(n, search, fuzz);
251                 if (dir < 0)
252                         n = n->rb_left;
253                 else if (dir > 0)
254                         n = n->rb_right;
255                 else
256                         return n;
257         }
258
259         return NULL;
260 }
261
262 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
263                                u64 *size, u64 *physical_dup)
264 {
265         struct fs_chunk *fs_chunk;
266         struct rb_node *entry;
267         struct fs_chunk search;
268         u64 offset;
269
270         if (logical == BTRFS_SUPER_INFO_OFFSET)
271                 return logical;
272
273         search.logical = logical;
274         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
275         if (!entry) {
276                 if (mdres->in != stdin)
277                         warning("cannot find a chunk, using logical");
278                 return logical;
279         }
280         fs_chunk = rb_entry(entry, struct fs_chunk, l);
281         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
282                 BUG();
283         offset = search.logical - fs_chunk->logical;
284
285         if (physical_dup) {
286                 /* Only in dup case, physical_dup is not equal to 0 */
287                 if (fs_chunk->physical_dup)
288                         *physical_dup = fs_chunk->physical_dup + offset;
289                 else
290                         *physical_dup = 0;
291         }
292
293         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
294         return fs_chunk->physical + offset;
295 }
296
297 /*
298  * Reverse CRC-32C table
299  */
300 static const u32 crc32c_rev_table[256] = {
301         0x00000000L,0x05EC76F1L,0x0BD8EDE2L,0x0E349B13L,
302         0x17B1DBC4L,0x125DAD35L,0x1C693626L,0x198540D7L,
303         0x2F63B788L,0x2A8FC179L,0x24BB5A6AL,0x21572C9BL,
304         0x38D26C4CL,0x3D3E1ABDL,0x330A81AEL,0x36E6F75FL,
305         0x5EC76F10L,0x5B2B19E1L,0x551F82F2L,0x50F3F403L,
306         0x4976B4D4L,0x4C9AC225L,0x42AE5936L,0x47422FC7L,
307         0x71A4D898L,0x7448AE69L,0x7A7C357AL,0x7F90438BL,
308         0x6615035CL,0x63F975ADL,0x6DCDEEBEL,0x6821984FL,
309         0xBD8EDE20L,0xB862A8D1L,0xB65633C2L,0xB3BA4533L,
310         0xAA3F05E4L,0xAFD37315L,0xA1E7E806L,0xA40B9EF7L,
311         0x92ED69A8L,0x97011F59L,0x9935844AL,0x9CD9F2BBL,
312         0x855CB26CL,0x80B0C49DL,0x8E845F8EL,0x8B68297FL,
313         0xE349B130L,0xE6A5C7C1L,0xE8915CD2L,0xED7D2A23L,
314         0xF4F86AF4L,0xF1141C05L,0xFF208716L,0xFACCF1E7L,
315         0xCC2A06B8L,0xC9C67049L,0xC7F2EB5AL,0xC21E9DABL,
316         0xDB9BDD7CL,0xDE77AB8DL,0xD043309EL,0xD5AF466FL,
317         0x7EF1CAB1L,0x7B1DBC40L,0x75292753L,0x70C551A2L,
318         0x69401175L,0x6CAC6784L,0x6298FC97L,0x67748A66L,
319         0x51927D39L,0x547E0BC8L,0x5A4A90DBL,0x5FA6E62AL,
320         0x4623A6FDL,0x43CFD00CL,0x4DFB4B1FL,0x48173DEEL,
321         0x2036A5A1L,0x25DAD350L,0x2BEE4843L,0x2E023EB2L,
322         0x37877E65L,0x326B0894L,0x3C5F9387L,0x39B3E576L,
323         0x0F551229L,0x0AB964D8L,0x048DFFCBL,0x0161893AL,
324         0x18E4C9EDL,0x1D08BF1CL,0x133C240FL,0x16D052FEL,
325         0xC37F1491L,0xC6936260L,0xC8A7F973L,0xCD4B8F82L,
326         0xD4CECF55L,0xD122B9A4L,0xDF1622B7L,0xDAFA5446L,
327         0xEC1CA319L,0xE9F0D5E8L,0xE7C44EFBL,0xE228380AL,
328         0xFBAD78DDL,0xFE410E2CL,0xF075953FL,0xF599E3CEL,
329         0x9DB87B81L,0x98540D70L,0x96609663L,0x938CE092L,
330         0x8A09A045L,0x8FE5D6B4L,0x81D14DA7L,0x843D3B56L,
331         0xB2DBCC09L,0xB737BAF8L,0xB90321EBL,0xBCEF571AL,
332         0xA56A17CDL,0xA086613CL,0xAEB2FA2FL,0xAB5E8CDEL,
333         0xFDE39562L,0xF80FE393L,0xF63B7880L,0xF3D70E71L,
334         0xEA524EA6L,0xEFBE3857L,0xE18AA344L,0xE466D5B5L,
335         0xD28022EAL,0xD76C541BL,0xD958CF08L,0xDCB4B9F9L,
336         0xC531F92EL,0xC0DD8FDFL,0xCEE914CCL,0xCB05623DL,
337         0xA324FA72L,0xA6C88C83L,0xA8FC1790L,0xAD106161L,
338         0xB49521B6L,0xB1795747L,0xBF4DCC54L,0xBAA1BAA5L,
339         0x8C474DFAL,0x89AB3B0BL,0x879FA018L,0x8273D6E9L,
340         0x9BF6963EL,0x9E1AE0CFL,0x902E7BDCL,0x95C20D2DL,
341         0x406D4B42L,0x45813DB3L,0x4BB5A6A0L,0x4E59D051L,
342         0x57DC9086L,0x5230E677L,0x5C047D64L,0x59E80B95L,
343         0x6F0EFCCAL,0x6AE28A3BL,0x64D61128L,0x613A67D9L,
344         0x78BF270EL,0x7D5351FFL,0x7367CAECL,0x768BBC1DL,
345         0x1EAA2452L,0x1B4652A3L,0x1572C9B0L,0x109EBF41L,
346         0x091BFF96L,0x0CF78967L,0x02C31274L,0x072F6485L,
347         0x31C993DAL,0x3425E52BL,0x3A117E38L,0x3FFD08C9L,
348         0x2678481EL,0x23943EEFL,0x2DA0A5FCL,0x284CD30DL,
349         0x83125FD3L,0x86FE2922L,0x88CAB231L,0x8D26C4C0L,
350         0x94A38417L,0x914FF2E6L,0x9F7B69F5L,0x9A971F04L,
351         0xAC71E85BL,0xA99D9EAAL,0xA7A905B9L,0xA2457348L,
352         0xBBC0339FL,0xBE2C456EL,0xB018DE7DL,0xB5F4A88CL,
353         0xDDD530C3L,0xD8394632L,0xD60DDD21L,0xD3E1ABD0L,
354         0xCA64EB07L,0xCF889DF6L,0xC1BC06E5L,0xC4507014L,
355         0xF2B6874BL,0xF75AF1BAL,0xF96E6AA9L,0xFC821C58L,
356         0xE5075C8FL,0xE0EB2A7EL,0xEEDFB16DL,0xEB33C79CL,
357         0x3E9C81F3L,0x3B70F702L,0x35446C11L,0x30A81AE0L,
358         0x292D5A37L,0x2CC12CC6L,0x22F5B7D5L,0x2719C124L,
359         0x11FF367BL,0x1413408AL,0x1A27DB99L,0x1FCBAD68L,
360         0x064EEDBFL,0x03A29B4EL,0x0D96005DL,0x087A76ACL,
361         0x605BEEE3L,0x65B79812L,0x6B830301L,0x6E6F75F0L,
362         0x77EA3527L,0x720643D6L,0x7C32D8C5L,0x79DEAE34L,
363         0x4F38596BL,0x4AD42F9AL,0x44E0B489L,0x410CC278L,
364         0x588982AFL,0x5D65F45EL,0x53516F4DL,0x56BD19BCL
365 };
366
367 /*
368  * Calculate a 4-byte suffix to match desired CRC32C
369  *
370  * @current_crc: CRC32C checksum of all bytes before the suffix
371  * @desired_crc: the checksum that we want to get after adding the suffix
372  *
373  * Outputs: @suffix: pointer to where the suffix will be written (4-bytes)
374  */
375 static void find_collision_calc_suffix(unsigned long current_crc,
376                                        unsigned long desired_crc,
377                                        char *suffix)
378 {
379         int i;
380
381         for(i = 3; i >= 0; i--) {
382                 desired_crc = (desired_crc << 8)
383                             ^ crc32c_rev_table[desired_crc >> 24 & 0xFF]
384                             ^ ((current_crc >> i * 8) & 0xFF);
385         }
386         for (i = 0; i < 4; i++)
387                 suffix[i] = (desired_crc >> i * 8) & 0xFF;
388 }
389
390 /*
391  * Check if suffix is valid according to our file name conventions
392  */
393 static int find_collision_is_suffix_valid(const char *suffix)
394 {
395         int i;
396         char c;
397
398         for (i = 0; i < 4; i++) {
399                 c = suffix[i];
400                 if (c < ' ' || c > 126 || c == '/')
401                         return 0;
402         }
403         return 1;
404 }
405
406 static int find_collision_reverse_crc32c(struct name *val, u32 name_len)
407 {
408         unsigned long checksum;
409         unsigned long current_checksum;
410         int found = 0;
411         int i;
412
413         /* There are no same length collisions of 4 or less bytes */
414         if (name_len <= 4)
415                 return 0;
416         checksum = crc32c(~1, val->val, name_len);
417         name_len -= 4;
418         memset(val->sub, ' ', name_len);
419         i = 0;
420         while (1) {
421                 current_checksum = crc32c(~1, val->sub, name_len);
422                 find_collision_calc_suffix(current_checksum,
423                                            checksum,
424                                            val->sub + name_len);
425                 if (find_collision_is_suffix_valid(val->sub + name_len) &&
426                     memcmp(val->sub, val->val, val->len)) {
427                         found = 1;
428                         break;
429                 }
430
431                 if (val->sub[i] == 126) {
432                         do {
433                                 i++;
434                                 if (i >= name_len)
435                                         break;
436                         } while (val->sub[i] == 126);
437
438                         if (i >= name_len)
439                                 break;
440                         val->sub[i]++;
441                         if (val->sub[i] == '/')
442                                 val->sub[i]++;
443                         memset(val->sub, ' ', i);
444                         i = 0;
445                         continue;
446                 } else {
447                         val->sub[i]++;
448                         if (val->sub[i] == '/')
449                                 val->sub[i]++;
450                 }
451         }
452         return found;
453 }
454
455 static char *find_collision(struct metadump_struct *md, char *name,
456                             u32 name_len)
457 {
458         struct name *val;
459         struct rb_node *entry;
460         struct name tmp;
461         int found;
462         int i;
463
464         tmp.val = name;
465         tmp.len = name_len;
466         entry = tree_search(&md->name_tree, &tmp.n, name_cmp, 0);
467         if (entry) {
468                 val = rb_entry(entry, struct name, n);
469                 free(name);
470                 return val->sub;
471         }
472
473         val = malloc(sizeof(struct name));
474         if (!val) {
475                 error("cannot sanitize name, not enough memory");
476                 free(name);
477                 return NULL;
478         }
479
480         memset(val, 0, sizeof(*val));
481
482         val->val = name;
483         val->len = name_len;
484         val->sub = malloc(name_len);
485         if (!val->sub) {
486                 error("cannot sanitize name, not enough memory");
487                 free(val);
488                 free(name);
489                 return NULL;
490         }
491
492         found = find_collision_reverse_crc32c(val, name_len);
493
494         if (!found) {
495                 warning(
496 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
497                         val->len, val->val);
498                 for (i = 0; i < name_len; i++) {
499                         char c = rand_range(94) + 33;
500
501                         if (c == '/')
502                                 c++;
503                         val->sub[i] = c;
504                 }
505         }
506
507         tree_insert(&md->name_tree, &val->n, name_cmp);
508         return val->sub;
509 }
510
511 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
512                               int slot)
513 {
514         struct btrfs_dir_item *dir_item;
515         char *buf;
516         char *garbage;
517         unsigned long name_ptr;
518         u32 total_len;
519         u32 cur = 0;
520         u32 this_len;
521         u32 name_len;
522         int free_garbage = (md->sanitize_names == 1);
523
524         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
525         total_len = btrfs_item_size_nr(eb, slot);
526         while (cur < total_len) {
527                 this_len = sizeof(*dir_item) +
528                         btrfs_dir_name_len(eb, dir_item) +
529                         btrfs_dir_data_len(eb, dir_item);
530                 name_ptr = (unsigned long)(dir_item + 1);
531                 name_len = btrfs_dir_name_len(eb, dir_item);
532
533                 if (md->sanitize_names > 1) {
534                         buf = malloc(name_len);
535                         if (!buf) {
536                                 error("cannot sanitize name, not enough memory");
537                                 return;
538                         }
539                         read_extent_buffer(eb, buf, name_ptr, name_len);
540                         garbage = find_collision(md, buf, name_len);
541                 } else {
542                         garbage = generate_garbage(name_len);
543                 }
544                 if (!garbage) {
545                         error("cannot sanitize name, not enough memory");
546                         return;
547                 }
548                 write_extent_buffer(eb, garbage, name_ptr, name_len);
549                 cur += this_len;
550                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
551                                                      this_len);
552                 if (free_garbage)
553                         free(garbage);
554         }
555 }
556
557 static void sanitize_inode_ref(struct metadump_struct *md,
558                                struct extent_buffer *eb, int slot, int ext)
559 {
560         struct btrfs_inode_extref *extref;
561         struct btrfs_inode_ref *ref;
562         char *garbage, *buf;
563         unsigned long ptr;
564         unsigned long name_ptr;
565         u32 item_size;
566         u32 cur_offset = 0;
567         int len;
568         int free_garbage = (md->sanitize_names == 1);
569
570         item_size = btrfs_item_size_nr(eb, slot);
571         ptr = btrfs_item_ptr_offset(eb, slot);
572         while (cur_offset < item_size) {
573                 if (ext) {
574                         extref = (struct btrfs_inode_extref *)(ptr +
575                                                                cur_offset);
576                         name_ptr = (unsigned long)(&extref->name);
577                         len = btrfs_inode_extref_name_len(eb, extref);
578                         cur_offset += sizeof(*extref);
579                 } else {
580                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
581                         len = btrfs_inode_ref_name_len(eb, ref);
582                         name_ptr = (unsigned long)(ref + 1);
583                         cur_offset += sizeof(*ref);
584                 }
585                 cur_offset += len;
586
587                 if (md->sanitize_names > 1) {
588                         buf = malloc(len);
589                         if (!buf) {
590                                 error("cannot sanitize name, not enough memory");
591                                 return;
592                         }
593                         read_extent_buffer(eb, buf, name_ptr, len);
594                         garbage = find_collision(md, buf, len);
595                 } else {
596                         garbage = generate_garbage(len);
597                 }
598
599                 if (!garbage) {
600                         error("cannot sanitize name, not enough memory");
601                         return;
602                 }
603                 write_extent_buffer(eb, garbage, name_ptr, len);
604                 if (free_garbage)
605                         free(garbage);
606         }
607 }
608
609 static void sanitize_xattr(struct metadump_struct *md,
610                            struct extent_buffer *eb, int slot)
611 {
612         struct btrfs_dir_item *dir_item;
613         unsigned long data_ptr;
614         u32 data_len;
615
616         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
617         data_len = btrfs_dir_data_len(eb, dir_item);
618
619         data_ptr = (unsigned long)((char *)(dir_item + 1) +
620                                    btrfs_dir_name_len(eb, dir_item));
621         memset_extent_buffer(eb, 0, data_ptr, data_len);
622 }
623
624 static void sanitize_name(struct metadump_struct *md, u8 *dst,
625                           struct extent_buffer *src, struct btrfs_key *key,
626                           int slot)
627 {
628         struct extent_buffer *eb;
629
630         eb = alloc_dummy_eb(src->start, src->len);
631         if (!eb) {
632                 error("cannot sanitize name, not enough memory");
633                 return;
634         }
635
636         memcpy(eb->data, src->data, src->len);
637
638         switch (key->type) {
639         case BTRFS_DIR_ITEM_KEY:
640         case BTRFS_DIR_INDEX_KEY:
641                 sanitize_dir_item(md, eb, slot);
642                 break;
643         case BTRFS_INODE_REF_KEY:
644                 sanitize_inode_ref(md, eb, slot, 0);
645                 break;
646         case BTRFS_INODE_EXTREF_KEY:
647                 sanitize_inode_ref(md, eb, slot, 1);
648                 break;
649         case BTRFS_XATTR_ITEM_KEY:
650                 sanitize_xattr(md, eb, slot);
651                 break;
652         default:
653                 break;
654         }
655
656         memcpy(dst, eb->data, eb->len);
657         free(eb);
658 }
659
660 /*
661  * zero inline extents and csum items
662  */
663 static void zero_items(struct metadump_struct *md, u8 *dst,
664                        struct extent_buffer *src)
665 {
666         struct btrfs_file_extent_item *fi;
667         struct btrfs_item *item;
668         struct btrfs_key key;
669         u32 nritems = btrfs_header_nritems(src);
670         size_t size;
671         unsigned long ptr;
672         int i, extent_type;
673
674         for (i = 0; i < nritems; i++) {
675                 item = btrfs_item_nr(i);
676                 btrfs_item_key_to_cpu(src, &key, i);
677                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
678                         size = btrfs_item_size_nr(src, i);
679                         memset(dst + btrfs_leaf_data(src) +
680                                btrfs_item_offset_nr(src, i), 0, size);
681                         continue;
682                 }
683
684                 if (md->sanitize_names && has_name(&key)) {
685                         sanitize_name(md, dst, src, &key, i);
686                         continue;
687                 }
688
689                 if (key.type != BTRFS_EXTENT_DATA_KEY)
690                         continue;
691
692                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
693                 extent_type = btrfs_file_extent_type(src, fi);
694                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
695                         continue;
696
697                 ptr = btrfs_file_extent_inline_start(fi);
698                 size = btrfs_file_extent_inline_item_len(src, item);
699                 memset(dst + ptr, 0, size);
700         }
701 }
702
703 /*
704  * copy buffer and zero useless data in the buffer
705  */
706 static void copy_buffer(struct metadump_struct *md, u8 *dst,
707                         struct extent_buffer *src)
708 {
709         int level;
710         size_t size;
711         u32 nritems;
712
713         memcpy(dst, src->data, src->len);
714         if (src->start == BTRFS_SUPER_INFO_OFFSET)
715                 return;
716
717         level = btrfs_header_level(src);
718         nritems = btrfs_header_nritems(src);
719
720         if (nritems == 0) {
721                 size = sizeof(struct btrfs_header);
722                 memset(dst + size, 0, src->len - size);
723         } else if (level == 0) {
724                 size = btrfs_leaf_data(src) +
725                         btrfs_item_offset_nr(src, nritems - 1) -
726                         btrfs_item_nr_offset(nritems);
727                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
728                 zero_items(md, dst, src);
729         } else {
730                 size = offsetof(struct btrfs_node, ptrs) +
731                         sizeof(struct btrfs_key_ptr) * nritems;
732                 memset(dst + size, 0, src->len - size);
733         }
734         csum_block(dst, src->len);
735 }
736
737 static void *dump_worker(void *data)
738 {
739         struct metadump_struct *md = (struct metadump_struct *)data;
740         struct async_work *async;
741         int ret;
742
743         while (1) {
744                 pthread_mutex_lock(&md->mutex);
745                 while (list_empty(&md->list)) {
746                         if (md->done) {
747                                 pthread_mutex_unlock(&md->mutex);
748                                 goto out;
749                         }
750                         pthread_cond_wait(&md->cond, &md->mutex);
751                 }
752                 async = list_entry(md->list.next, struct async_work, list);
753                 list_del_init(&async->list);
754                 pthread_mutex_unlock(&md->mutex);
755
756                 if (md->compress_level > 0) {
757                         u8 *orig = async->buffer;
758
759                         async->bufsize = compressBound(async->size);
760                         async->buffer = malloc(async->bufsize);
761                         if (!async->buffer) {
762                                 error("not enough memory for async buffer");
763                                 pthread_mutex_lock(&md->mutex);
764                                 if (!md->error)
765                                         md->error = -ENOMEM;
766                                 pthread_mutex_unlock(&md->mutex);
767                                 pthread_exit(NULL);
768                         }
769
770                         ret = compress2(async->buffer,
771                                          (unsigned long *)&async->bufsize,
772                                          orig, async->size, md->compress_level);
773
774                         if (ret != Z_OK)
775                                 async->error = 1;
776
777                         free(orig);
778                 }
779
780                 pthread_mutex_lock(&md->mutex);
781                 md->num_ready++;
782                 pthread_mutex_unlock(&md->mutex);
783         }
784 out:
785         pthread_exit(NULL);
786 }
787
788 static void meta_cluster_init(struct metadump_struct *md, u64 start)
789 {
790         struct meta_cluster_header *header;
791
792         md->num_items = 0;
793         md->num_ready = 0;
794         header = &md->cluster.header;
795         header->magic = cpu_to_le64(HEADER_MAGIC);
796         header->bytenr = cpu_to_le64(start);
797         header->nritems = cpu_to_le32(0);
798         header->compress = md->compress_level > 0 ?
799                            COMPRESS_ZLIB : COMPRESS_NONE;
800 }
801
802 static void metadump_destroy(struct metadump_struct *md, int num_threads)
803 {
804         int i;
805         struct rb_node *n;
806
807         pthread_mutex_lock(&md->mutex);
808         md->done = 1;
809         pthread_cond_broadcast(&md->cond);
810         pthread_mutex_unlock(&md->mutex);
811
812         for (i = 0; i < num_threads; i++)
813                 pthread_join(md->threads[i], NULL);
814
815         pthread_cond_destroy(&md->cond);
816         pthread_mutex_destroy(&md->mutex);
817
818         while ((n = rb_first(&md->name_tree))) {
819                 struct name *name;
820
821                 name = rb_entry(n, struct name, n);
822                 rb_erase(n, &md->name_tree);
823                 free(name->val);
824                 free(name->sub);
825                 free(name);
826         }
827 }
828
829 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
830                          FILE *out, int num_threads, int compress_level,
831                          int sanitize_names)
832 {
833         int i, ret = 0;
834
835         memset(md, 0, sizeof(*md));
836         INIT_LIST_HEAD(&md->list);
837         INIT_LIST_HEAD(&md->ordered);
838         md->root = root;
839         md->out = out;
840         md->pending_start = (u64)-1;
841         md->compress_level = compress_level;
842         md->sanitize_names = sanitize_names;
843         if (sanitize_names > 1)
844                 crc32c_optimization_init();
845
846         md->name_tree.rb_node = NULL;
847         md->num_threads = num_threads;
848         pthread_cond_init(&md->cond, NULL);
849         pthread_mutex_init(&md->mutex, NULL);
850         meta_cluster_init(md, 0);
851
852         if (!num_threads)
853                 return 0;
854
855         for (i = 0; i < num_threads; i++) {
856                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
857                 if (ret)
858                         break;
859         }
860
861         if (ret)
862                 metadump_destroy(md, i + 1);
863
864         return ret;
865 }
866
867 static int write_zero(FILE *out, size_t size)
868 {
869         static char zero[BLOCK_SIZE];
870         return fwrite(zero, size, 1, out);
871 }
872
873 static int write_buffers(struct metadump_struct *md, u64 *next)
874 {
875         struct meta_cluster_header *header = &md->cluster.header;
876         struct meta_cluster_item *item;
877         struct async_work *async;
878         u64 bytenr = 0;
879         u32 nritems = 0;
880         int ret;
881         int err = 0;
882
883         if (list_empty(&md->ordered))
884                 goto out;
885
886         /* wait until all buffers are compressed */
887         while (!err && md->num_items > md->num_ready) {
888                 struct timespec ts = {
889                         .tv_sec = 0,
890                         .tv_nsec = 10000000,
891                 };
892                 pthread_mutex_unlock(&md->mutex);
893                 nanosleep(&ts, NULL);
894                 pthread_mutex_lock(&md->mutex);
895                 err = md->error;
896         }
897
898         if (err) {
899                 error("one of the threads failed: %s", strerror(-err));
900                 goto out;
901         }
902
903         /* setup and write index block */
904         list_for_each_entry(async, &md->ordered, ordered) {
905                 item = &md->cluster.items[nritems];
906                 item->bytenr = cpu_to_le64(async->start);
907                 item->size = cpu_to_le32(async->bufsize);
908                 nritems++;
909         }
910         header->nritems = cpu_to_le32(nritems);
911
912         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
913         if (ret != 1) {
914                 error("unable to write out cluster: %s", strerror(errno));
915                 return -errno;
916         }
917
918         /* write buffers */
919         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
920         while (!list_empty(&md->ordered)) {
921                 async = list_entry(md->ordered.next, struct async_work,
922                                    ordered);
923                 list_del_init(&async->ordered);
924
925                 bytenr += async->bufsize;
926                 if (!err)
927                         ret = fwrite(async->buffer, async->bufsize, 1,
928                                      md->out);
929                 if (ret != 1) {
930                         error("unable to write out cluster: %s",
931                                 strerror(errno));
932                         err = -errno;
933                         ret = 0;
934                 }
935
936                 free(async->buffer);
937                 free(async);
938         }
939
940         /* zero unused space in the last block */
941         if (!err && bytenr & BLOCK_MASK) {
942                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
943
944                 bytenr += size;
945                 ret = write_zero(md->out, size);
946                 if (ret != 1) {
947                         error("unable to zero out buffer: %s",
948                                 strerror(errno));
949                         err = -errno;
950                 }
951         }
952 out:
953         *next = bytenr;
954         return err;
955 }
956
957 static int read_data_extent(struct metadump_struct *md,
958                             struct async_work *async)
959 {
960         struct btrfs_root *root = md->root;
961         struct btrfs_fs_info *fs_info = root->fs_info;
962         u64 bytes_left = async->size;
963         u64 logical = async->start;
964         u64 offset = 0;
965         u64 read_len;
966         int num_copies;
967         int cur_mirror;
968         int ret;
969
970         num_copies = btrfs_num_copies(root->fs_info, logical, bytes_left);
971
972         /* Try our best to read data, just like read_tree_block() */
973         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
974                 while (bytes_left) {
975                         read_len = bytes_left;
976                         ret = read_extent_data(fs_info,
977                                         (char *)(async->buffer + offset),
978                                         logical, &read_len, cur_mirror);
979                         if (ret < 0)
980                                 break;
981                         offset += read_len;
982                         logical += read_len;
983                         bytes_left -= read_len;
984                 }
985         }
986         if (bytes_left)
987                 return -EIO;
988         return 0;
989 }
990
991 static int get_dev_fd(struct btrfs_root *root)
992 {
993         struct btrfs_device *dev;
994
995         dev = list_first_entry(&root->fs_info->fs_devices->devices,
996                                struct btrfs_device, dev_list);
997         return dev->fd;
998 }
999
1000 static int flush_pending(struct metadump_struct *md, int done)
1001 {
1002         struct async_work *async = NULL;
1003         struct extent_buffer *eb;
1004         u64 start = 0;
1005         u64 size;
1006         size_t offset;
1007         int ret = 0;
1008
1009         if (md->pending_size) {
1010                 async = calloc(1, sizeof(*async));
1011                 if (!async)
1012                         return -ENOMEM;
1013
1014                 async->start = md->pending_start;
1015                 async->size = md->pending_size;
1016                 async->bufsize = async->size;
1017                 async->buffer = malloc(async->bufsize);
1018                 if (!async->buffer) {
1019                         free(async);
1020                         return -ENOMEM;
1021                 }
1022                 offset = 0;
1023                 start = async->start;
1024                 size = async->size;
1025
1026                 if (md->data) {
1027                         ret = read_data_extent(md, async);
1028                         if (ret) {
1029                                 free(async->buffer);
1030                                 free(async);
1031                                 return ret;
1032                         }
1033                 }
1034
1035                 /*
1036                  * Balance can make the mapping not cover the super block, so
1037                  * just copy directly from one of the devices.
1038                  */
1039                 if (start == BTRFS_SUPER_INFO_OFFSET) {
1040                         int fd = get_dev_fd(md->root);
1041
1042                         ret = pread64(fd, async->buffer, size, start);
1043                         if (ret < size) {
1044                                 free(async->buffer);
1045                                 free(async);
1046                                 error("unable to read superblock at %llu: %s",
1047                                                 (unsigned long long)start,
1048                                                 strerror(errno));
1049                                 return -errno;
1050                         }
1051                         size = 0;
1052                         ret = 0;
1053                 }
1054
1055                 while (!md->data && size > 0) {
1056                         u64 this_read = min((u64)md->root->fs_info->nodesize,
1057                                         size);
1058
1059                         eb = read_tree_block(md->root->fs_info, start, 0);
1060                         if (!extent_buffer_uptodate(eb)) {
1061                                 free(async->buffer);
1062                                 free(async);
1063                                 error("unable to read metadata block %llu",
1064                                         (unsigned long long)start);
1065                                 return -EIO;
1066                         }
1067                         copy_buffer(md, async->buffer + offset, eb);
1068                         free_extent_buffer(eb);
1069                         start += this_read;
1070                         offset += this_read;
1071                         size -= this_read;
1072                 }
1073
1074                 md->pending_start = (u64)-1;
1075                 md->pending_size = 0;
1076         } else if (!done) {
1077                 return 0;
1078         }
1079
1080         pthread_mutex_lock(&md->mutex);
1081         if (async) {
1082                 list_add_tail(&async->ordered, &md->ordered);
1083                 md->num_items++;
1084                 if (md->compress_level > 0) {
1085                         list_add_tail(&async->list, &md->list);
1086                         pthread_cond_signal(&md->cond);
1087                 } else {
1088                         md->num_ready++;
1089                 }
1090         }
1091         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1092                 ret = write_buffers(md, &start);
1093                 if (ret)
1094                         error("unable to write buffers: %s", strerror(-ret));
1095                 else
1096                         meta_cluster_init(md, start);
1097         }
1098         pthread_mutex_unlock(&md->mutex);
1099         return ret;
1100 }
1101
1102 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1103                       int data)
1104 {
1105         int ret;
1106         if (md->data != data ||
1107             md->pending_size + size > MAX_PENDING_SIZE ||
1108             md->pending_start + md->pending_size != start) {
1109                 ret = flush_pending(md, 0);
1110                 if (ret)
1111                         return ret;
1112                 md->pending_start = start;
1113         }
1114         readahead_tree_block(md->root->fs_info, start, 0);
1115         md->pending_size += size;
1116         md->data = data;
1117         return 0;
1118 }
1119
1120 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1121 static int is_tree_block(struct btrfs_root *extent_root,
1122                          struct btrfs_path *path, u64 bytenr)
1123 {
1124         struct extent_buffer *leaf;
1125         struct btrfs_key key;
1126         u64 ref_objectid;
1127         int ret;
1128
1129         leaf = path->nodes[0];
1130         while (1) {
1131                 struct btrfs_extent_ref_v0 *ref_item;
1132                 path->slots[0]++;
1133                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1134                         ret = btrfs_next_leaf(extent_root, path);
1135                         if (ret < 0)
1136                                 return ret;
1137                         if (ret > 0)
1138                                 break;
1139                         leaf = path->nodes[0];
1140                 }
1141                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1142                 if (key.objectid != bytenr)
1143                         break;
1144                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1145                         continue;
1146                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1147                                           struct btrfs_extent_ref_v0);
1148                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1149                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1150                         return 1;
1151                 break;
1152         }
1153         return 0;
1154 }
1155 #endif
1156
1157 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1158                             struct metadump_struct *metadump, int root_tree)
1159 {
1160         struct extent_buffer *tmp;
1161         struct btrfs_root_item *ri;
1162         struct btrfs_key key;
1163         struct btrfs_fs_info *fs_info = root->fs_info;
1164         u64 bytenr;
1165         int level;
1166         int nritems = 0;
1167         int i = 0;
1168         int ret;
1169
1170         ret = add_extent(btrfs_header_bytenr(eb), fs_info->nodesize,
1171                          metadump, 0);
1172         if (ret) {
1173                 error("unable to add metadata block %llu: %d",
1174                                 btrfs_header_bytenr(eb), ret);
1175                 return ret;
1176         }
1177
1178         if (btrfs_header_level(eb) == 0 && !root_tree)
1179                 return 0;
1180
1181         level = btrfs_header_level(eb);
1182         nritems = btrfs_header_nritems(eb);
1183         for (i = 0; i < nritems; i++) {
1184                 if (level == 0) {
1185                         btrfs_item_key_to_cpu(eb, &key, i);
1186                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1187                                 continue;
1188                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1189                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1190                         tmp = read_tree_block(fs_info, bytenr, 0);
1191                         if (!extent_buffer_uptodate(tmp)) {
1192                                 error("unable to read log root block");
1193                                 return -EIO;
1194                         }
1195                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1196                         free_extent_buffer(tmp);
1197                         if (ret)
1198                                 return ret;
1199                 } else {
1200                         bytenr = btrfs_node_blockptr(eb, i);
1201                         tmp = read_tree_block(fs_info, bytenr, 0);
1202                         if (!extent_buffer_uptodate(tmp)) {
1203                                 error("unable to read log root block");
1204                                 return -EIO;
1205                         }
1206                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1207                         free_extent_buffer(tmp);
1208                         if (ret)
1209                                 return ret;
1210                 }
1211         }
1212
1213         return 0;
1214 }
1215
1216 static int copy_log_trees(struct btrfs_root *root,
1217                           struct metadump_struct *metadump)
1218 {
1219         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1220
1221         if (blocknr == 0)
1222                 return 0;
1223
1224         if (!root->fs_info->log_root_tree ||
1225             !root->fs_info->log_root_tree->node) {
1226                 error("unable to copy tree log, it has not been setup");
1227                 return -EIO;
1228         }
1229
1230         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1231                                 metadump, 1);
1232 }
1233
1234 static int copy_space_cache(struct btrfs_root *root,
1235                             struct metadump_struct *metadump,
1236                             struct btrfs_path *path)
1237 {
1238         struct extent_buffer *leaf;
1239         struct btrfs_file_extent_item *fi;
1240         struct btrfs_key key;
1241         u64 bytenr, num_bytes;
1242         int ret;
1243
1244         root = root->fs_info->tree_root;
1245
1246         key.objectid = 0;
1247         key.type = BTRFS_EXTENT_DATA_KEY;
1248         key.offset = 0;
1249
1250         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1251         if (ret < 0) {
1252                 error("free space inode not found: %d", ret);
1253                 return ret;
1254         }
1255
1256         leaf = path->nodes[0];
1257
1258         while (1) {
1259                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1260                         ret = btrfs_next_leaf(root, path);
1261                         if (ret < 0) {
1262                                 error("cannot go to next leaf %d", ret);
1263                                 return ret;
1264                         }
1265                         if (ret > 0)
1266                                 break;
1267                         leaf = path->nodes[0];
1268                 }
1269
1270                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1271                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1272                         path->slots[0]++;
1273                         continue;
1274                 }
1275
1276                 fi = btrfs_item_ptr(leaf, path->slots[0],
1277                                     struct btrfs_file_extent_item);
1278                 if (btrfs_file_extent_type(leaf, fi) !=
1279                     BTRFS_FILE_EXTENT_REG) {
1280                         path->slots[0]++;
1281                         continue;
1282                 }
1283
1284                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1285                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1286                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1287                 if (ret) {
1288                         error("unable to add space cache blocks %d", ret);
1289                         btrfs_release_path(path);
1290                         return ret;
1291                 }
1292                 path->slots[0]++;
1293         }
1294
1295         return 0;
1296 }
1297
1298 static int copy_from_extent_tree(struct metadump_struct *metadump,
1299                                  struct btrfs_path *path)
1300 {
1301         struct btrfs_root *extent_root;
1302         struct extent_buffer *leaf;
1303         struct btrfs_extent_item *ei;
1304         struct btrfs_key key;
1305         u64 bytenr;
1306         u64 num_bytes;
1307         int ret;
1308
1309         extent_root = metadump->root->fs_info->extent_root;
1310         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1311         key.objectid = bytenr;
1312         key.type = BTRFS_EXTENT_ITEM_KEY;
1313         key.offset = 0;
1314
1315         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1316         if (ret < 0) {
1317                 error("extent root not found: %d", ret);
1318                 return ret;
1319         }
1320         ret = 0;
1321
1322         leaf = path->nodes[0];
1323
1324         while (1) {
1325                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1326                         ret = btrfs_next_leaf(extent_root, path);
1327                         if (ret < 0) {
1328                                 error("cannot go to next leaf %d", ret);
1329                                 break;
1330                         }
1331                         if (ret > 0) {
1332                                 ret = 0;
1333                                 break;
1334                         }
1335                         leaf = path->nodes[0];
1336                 }
1337
1338                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1339                 if (key.objectid < bytenr ||
1340                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1341                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1342                         path->slots[0]++;
1343                         continue;
1344                 }
1345
1346                 bytenr = key.objectid;
1347                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1348                         num_bytes = extent_root->fs_info->nodesize;
1349                 } else {
1350                         num_bytes = key.offset;
1351                 }
1352
1353                 if (num_bytes == 0) {
1354                         error("extent length 0 at bytenr %llu key type %d",
1355                                         (unsigned long long)bytenr, key.type);
1356                         ret = -EIO;
1357                         break;
1358                 }
1359
1360                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1361                         ei = btrfs_item_ptr(leaf, path->slots[0],
1362                                             struct btrfs_extent_item);
1363                         if (btrfs_extent_flags(leaf, ei) &
1364                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1365                                 ret = add_extent(bytenr, num_bytes, metadump,
1366                                                  0);
1367                                 if (ret) {
1368                                         error("unable to add block %llu: %d",
1369                                                 (unsigned long long)bytenr, ret);
1370                                         break;
1371                                 }
1372                         }
1373                 } else {
1374 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1375                         ret = is_tree_block(extent_root, path, bytenr);
1376                         if (ret < 0) {
1377                                 error("failed to check tree block %llu: %d",
1378                                         (unsigned long long)bytenr, ret);
1379                                 break;
1380                         }
1381
1382                         if (ret) {
1383                                 ret = add_extent(bytenr, num_bytes, metadump,
1384                                                  0);
1385                                 if (ret) {
1386                                         error("unable to add block %llu: %d",
1387                                                 (unsigned long long)bytenr, ret);
1388                                         break;
1389                                 }
1390                         }
1391                         ret = 0;
1392 #else
1393                         error(
1394         "either extent tree is corrupted or you haven't built with V0 support");
1395                         ret = -EIO;
1396                         break;
1397 #endif
1398                 }
1399                 bytenr += num_bytes;
1400         }
1401
1402         btrfs_release_path(path);
1403
1404         return ret;
1405 }
1406
1407 static int create_metadump(const char *input, FILE *out, int num_threads,
1408                            int compress_level, int sanitize, int walk_trees)
1409 {
1410         struct btrfs_root *root;
1411         struct btrfs_path path;
1412         struct metadump_struct metadump;
1413         int ret;
1414         int err = 0;
1415
1416         root = open_ctree(input, 0, 0);
1417         if (!root) {
1418                 error("open ctree failed");
1419                 return -EIO;
1420         }
1421
1422         ret = metadump_init(&metadump, root, out, num_threads,
1423                             compress_level, sanitize);
1424         if (ret) {
1425                 error("failed to initialize metadump: %d", ret);
1426                 close_ctree(root);
1427                 return ret;
1428         }
1429
1430         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1431                         &metadump, 0);
1432         if (ret) {
1433                 error("unable to add metadata: %d", ret);
1434                 err = ret;
1435                 goto out;
1436         }
1437
1438         btrfs_init_path(&path);
1439
1440         if (walk_trees) {
1441                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1442                                        &metadump, 1);
1443                 if (ret) {
1444                         err = ret;
1445                         goto out;
1446                 }
1447
1448                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1449                                        &metadump, 1);
1450                 if (ret) {
1451                         err = ret;
1452                         goto out;
1453                 }
1454         } else {
1455                 ret = copy_from_extent_tree(&metadump, &path);
1456                 if (ret) {
1457                         err = ret;
1458                         goto out;
1459                 }
1460         }
1461
1462         ret = copy_log_trees(root, &metadump);
1463         if (ret) {
1464                 err = ret;
1465                 goto out;
1466         }
1467
1468         ret = copy_space_cache(root, &metadump, &path);
1469 out:
1470         ret = flush_pending(&metadump, 1);
1471         if (ret) {
1472                 if (!err)
1473                         err = ret;
1474                 error("failed to flush pending data: %d", ret);
1475         }
1476
1477         metadump_destroy(&metadump, num_threads);
1478
1479         btrfs_release_path(&path);
1480         ret = close_ctree(root);
1481         return err ? err : ret;
1482 }
1483
1484 static void update_super_old(u8 *buffer)
1485 {
1486         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1487         struct btrfs_chunk *chunk;
1488         struct btrfs_disk_key *key;
1489         u32 sectorsize = btrfs_super_sectorsize(super);
1490         u64 flags = btrfs_super_flags(super);
1491
1492         flags |= BTRFS_SUPER_FLAG_METADUMP;
1493         btrfs_set_super_flags(super, flags);
1494
1495         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1496         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1497                                        sizeof(struct btrfs_disk_key));
1498
1499         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1500         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1501         btrfs_set_disk_key_offset(key, 0);
1502
1503         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1504         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1505         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1506         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1507         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1508         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1509         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1510         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1511         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1512         chunk->stripe.devid = super->dev_item.devid;
1513         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1514         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1515         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1516         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1517 }
1518
1519 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1520 {
1521         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1522         struct btrfs_chunk *chunk;
1523         struct btrfs_disk_key *disk_key;
1524         struct btrfs_key key;
1525         u64 flags = btrfs_super_flags(super);
1526         u32 new_array_size = 0;
1527         u32 array_size;
1528         u32 cur = 0;
1529         u8 *ptr, *write_ptr;
1530         int old_num_stripes;
1531
1532         write_ptr = ptr = super->sys_chunk_array;
1533         array_size = btrfs_super_sys_array_size(super);
1534
1535         while (cur < array_size) {
1536                 disk_key = (struct btrfs_disk_key *)ptr;
1537                 btrfs_disk_key_to_cpu(&key, disk_key);
1538
1539                 new_array_size += sizeof(*disk_key);
1540                 memmove(write_ptr, ptr, sizeof(*disk_key));
1541
1542                 write_ptr += sizeof(*disk_key);
1543                 ptr += sizeof(*disk_key);
1544                 cur += sizeof(*disk_key);
1545
1546                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1547                         u64 type, physical, physical_dup, size = 0;
1548
1549                         chunk = (struct btrfs_chunk *)ptr;
1550                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1551                         chunk = (struct btrfs_chunk *)write_ptr;
1552
1553                         memmove(write_ptr, ptr, sizeof(*chunk));
1554                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1555                         type = btrfs_stack_chunk_type(chunk);
1556                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1557                                 new_array_size += sizeof(struct btrfs_stripe);
1558                                 write_ptr += sizeof(struct btrfs_stripe);
1559                         } else {
1560                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1561                                 btrfs_set_stack_chunk_type(chunk,
1562                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1563                         }
1564                         chunk->stripe.devid = super->dev_item.devid;
1565                         physical = logical_to_physical(mdres, key.offset,
1566                                                        &size, &physical_dup);
1567                         if (size != (u64)-1)
1568                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1569                                                               physical);
1570                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1571                                BTRFS_UUID_SIZE);
1572                         new_array_size += sizeof(*chunk);
1573                 } else {
1574                         error("bogus key in the sys array %d", key.type);
1575                         return -EIO;
1576                 }
1577                 write_ptr += sizeof(*chunk);
1578                 ptr += btrfs_chunk_item_size(old_num_stripes);
1579                 cur += btrfs_chunk_item_size(old_num_stripes);
1580         }
1581
1582         if (mdres->clear_space_cache)
1583                 btrfs_set_super_cache_generation(super, 0);
1584
1585         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1586         btrfs_set_super_flags(super, flags);
1587         btrfs_set_super_sys_array_size(super, new_array_size);
1588         btrfs_set_super_num_devices(super, 1);
1589         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1590
1591         return 0;
1592 }
1593
1594 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1595 {
1596         struct extent_buffer *eb;
1597
1598         eb = calloc(1, sizeof(struct extent_buffer) + size);
1599         if (!eb)
1600                 return NULL;
1601
1602         eb->start = bytenr;
1603         eb->len = size;
1604         return eb;
1605 }
1606
1607 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1608 {
1609         struct btrfs_item *item;
1610         u32 nritems;
1611         u32 old_size;
1612         u32 old_data_start;
1613         u32 size_diff;
1614         u32 data_end;
1615         int i;
1616
1617         old_size = btrfs_item_size_nr(eb, slot);
1618         if (old_size == new_size)
1619                 return;
1620
1621         nritems = btrfs_header_nritems(eb);
1622         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1623
1624         old_data_start = btrfs_item_offset_nr(eb, slot);
1625         size_diff = old_size - new_size;
1626
1627         for (i = slot; i < nritems; i++) {
1628                 u32 ioff;
1629                 item = btrfs_item_nr(i);
1630                 ioff = btrfs_item_offset(eb, item);
1631                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1632         }
1633
1634         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1635                               btrfs_leaf_data(eb) + data_end,
1636                               old_data_start + new_size - data_end);
1637         item = btrfs_item_nr(slot);
1638         btrfs_set_item_size(eb, item, new_size);
1639 }
1640
1641 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1642                                   struct async_work *async, u8 *buffer,
1643                                   size_t size)
1644 {
1645         struct extent_buffer *eb;
1646         size_t size_left = size;
1647         u64 bytenr = async->start;
1648         int i;
1649
1650         if (size_left % mdres->nodesize)
1651                 return 0;
1652
1653         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1654         if (!eb)
1655                 return -ENOMEM;
1656
1657         while (size_left) {
1658                 eb->start = bytenr;
1659                 memcpy(eb->data, buffer, mdres->nodesize);
1660
1661                 if (btrfs_header_bytenr(eb) != bytenr)
1662                         break;
1663                 if (memcmp(mdres->fsid,
1664                            eb->data + offsetof(struct btrfs_header, fsid),
1665                            BTRFS_FSID_SIZE))
1666                         break;
1667
1668                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1669                         goto next;
1670
1671                 if (btrfs_header_level(eb) != 0)
1672                         goto next;
1673
1674                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1675                         struct btrfs_chunk *chunk;
1676                         struct btrfs_key key;
1677                         u64 type, physical, physical_dup, size = (u64)-1;
1678
1679                         btrfs_item_key_to_cpu(eb, &key, i);
1680                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1681                                 continue;
1682
1683                         size = 0;
1684                         physical = logical_to_physical(mdres, key.offset,
1685                                                        &size, &physical_dup);
1686
1687                         if (!physical_dup)
1688                                 truncate_item(eb, i, sizeof(*chunk));
1689                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1690
1691
1692                         /* Zero out the RAID profile */
1693                         type = btrfs_chunk_type(eb, chunk);
1694                         type &= (BTRFS_BLOCK_GROUP_DATA |
1695                                  BTRFS_BLOCK_GROUP_SYSTEM |
1696                                  BTRFS_BLOCK_GROUP_METADATA |
1697                                  BTRFS_BLOCK_GROUP_DUP);
1698                         btrfs_set_chunk_type(eb, chunk, type);
1699
1700                         if (!physical_dup)
1701                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1702                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1703                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1704                         if (size != (u64)-1)
1705                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1706                                                            physical);
1707                         /* update stripe 2 offset */
1708                         if (physical_dup)
1709                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1710                                                            physical_dup);
1711
1712                         write_extent_buffer(eb, mdres->uuid,
1713                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1714                                                 chunk, 0),
1715                                         BTRFS_UUID_SIZE);
1716                 }
1717                 memcpy(buffer, eb->data, eb->len);
1718                 csum_block(buffer, eb->len);
1719 next:
1720                 size_left -= mdres->nodesize;
1721                 buffer += mdres->nodesize;
1722                 bytenr += mdres->nodesize;
1723         }
1724
1725         free(eb);
1726         return 0;
1727 }
1728
1729 static void write_backup_supers(int fd, u8 *buf)
1730 {
1731         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1732         struct stat st;
1733         u64 size;
1734         u64 bytenr;
1735         int i;
1736         int ret;
1737
1738         if (fstat(fd, &st)) {
1739                 error(
1740         "cannot stat restore point, won't be able to write backup supers: %s",
1741                         strerror(errno));
1742                 return;
1743         }
1744
1745         size = btrfs_device_size(fd, &st);
1746
1747         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1748                 bytenr = btrfs_sb_offset(i);
1749                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1750                         break;
1751                 btrfs_set_super_bytenr(super, bytenr);
1752                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1753                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1754                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1755                         if (ret < 0)
1756                                 error(
1757                                 "problem writing out backup super block %d: %s",
1758                                                 i, strerror(errno));
1759                         else
1760                                 error("short write writing out backup super block");
1761                         break;
1762                 }
1763         }
1764 }
1765
1766 static void *restore_worker(void *data)
1767 {
1768         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1769         struct async_work *async;
1770         size_t size;
1771         u8 *buffer;
1772         u8 *outbuf;
1773         int outfd;
1774         int ret;
1775         int compress_size = MAX_PENDING_SIZE * 4;
1776
1777         outfd = fileno(mdres->out);
1778         buffer = malloc(compress_size);
1779         if (!buffer) {
1780                 error("not enough memory for restore worker buffer");
1781                 pthread_mutex_lock(&mdres->mutex);
1782                 if (!mdres->error)
1783                         mdres->error = -ENOMEM;
1784                 pthread_mutex_unlock(&mdres->mutex);
1785                 pthread_exit(NULL);
1786         }
1787
1788         while (1) {
1789                 u64 bytenr, physical_dup;
1790                 off_t offset = 0;
1791                 int err = 0;
1792
1793                 pthread_mutex_lock(&mdres->mutex);
1794                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1795                         if (mdres->done) {
1796                                 pthread_mutex_unlock(&mdres->mutex);
1797                                 goto out;
1798                         }
1799                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1800                 }
1801                 async = list_entry(mdres->list.next, struct async_work, list);
1802                 list_del_init(&async->list);
1803
1804                 if (mdres->compress_method == COMPRESS_ZLIB) {
1805                         size = compress_size; 
1806                         pthread_mutex_unlock(&mdres->mutex);
1807                         ret = uncompress(buffer, (unsigned long *)&size,
1808                                          async->buffer, async->bufsize);
1809                         pthread_mutex_lock(&mdres->mutex);
1810                         if (ret != Z_OK) {
1811                                 error("decompression failed with %d", ret);
1812                                 err = -EIO;
1813                         }
1814                         outbuf = buffer;
1815                 } else {
1816                         outbuf = async->buffer;
1817                         size = async->bufsize;
1818                 }
1819
1820                 if (!mdres->multi_devices) {
1821                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1822                                 if (mdres->old_restore) {
1823                                         update_super_old(outbuf);
1824                                 } else {
1825                                         ret = update_super(mdres, outbuf);
1826                                         if (ret)
1827                                                 err = ret;
1828                                 }
1829                         } else if (!mdres->old_restore) {
1830                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1831                                 if (ret)
1832                                         err = ret;
1833                         }
1834                 }
1835
1836                 if (!mdres->fixup_offset) {
1837                         while (size) {
1838                                 u64 chunk_size = size;
1839                                 physical_dup = 0;
1840                                 if (!mdres->multi_devices && !mdres->old_restore)
1841                                         bytenr = logical_to_physical(mdres,
1842                                                      async->start + offset,
1843                                                      &chunk_size,
1844                                                      &physical_dup);
1845                                 else
1846                                         bytenr = async->start + offset;
1847
1848                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1849                                                bytenr);
1850                                 if (ret != chunk_size)
1851                                         goto error;
1852
1853                                 if (physical_dup)
1854                                         ret = pwrite64(outfd, outbuf+offset,
1855                                                        chunk_size,
1856                                                        physical_dup);
1857                                 if (ret != chunk_size)
1858                                         goto error;
1859
1860                                 size -= chunk_size;
1861                                 offset += chunk_size;
1862                                 continue;
1863
1864 error:
1865                                 if (ret < 0) {
1866                                         error("unable to write to device: %s",
1867                                                         strerror(errno));
1868                                         err = errno;
1869                                 } else {
1870                                         error("short write");
1871                                         err = -EIO;
1872                                 }
1873                         }
1874                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1875                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1876                         if (ret) {
1877                                 error("failed to write data");
1878                                 exit(1);
1879                         }
1880                 }
1881
1882
1883                 /* backup super blocks are already there at fixup_offset stage */
1884                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1885                         write_backup_supers(outfd, outbuf);
1886
1887                 if (err && !mdres->error)
1888                         mdres->error = err;
1889                 mdres->num_items--;
1890                 pthread_mutex_unlock(&mdres->mutex);
1891
1892                 free(async->buffer);
1893                 free(async);
1894         }
1895 out:
1896         free(buffer);
1897         pthread_exit(NULL);
1898 }
1899
1900 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1901 {
1902         struct rb_node *n;
1903         int i;
1904
1905         while ((n = rb_first(&mdres->chunk_tree))) {
1906                 struct fs_chunk *entry;
1907
1908                 entry = rb_entry(n, struct fs_chunk, l);
1909                 rb_erase(n, &mdres->chunk_tree);
1910                 rb_erase(&entry->p, &mdres->physical_tree);
1911                 free(entry);
1912         }
1913         pthread_mutex_lock(&mdres->mutex);
1914         mdres->done = 1;
1915         pthread_cond_broadcast(&mdres->cond);
1916         pthread_mutex_unlock(&mdres->mutex);
1917
1918         for (i = 0; i < num_threads; i++)
1919                 pthread_join(mdres->threads[i], NULL);
1920
1921         pthread_cond_destroy(&mdres->cond);
1922         pthread_mutex_destroy(&mdres->mutex);
1923 }
1924
1925 static int mdrestore_init(struct mdrestore_struct *mdres,
1926                           FILE *in, FILE *out, int old_restore,
1927                           int num_threads, int fixup_offset,
1928                           struct btrfs_fs_info *info, int multi_devices)
1929 {
1930         int i, ret = 0;
1931
1932         memset(mdres, 0, sizeof(*mdres));
1933         pthread_cond_init(&mdres->cond, NULL);
1934         pthread_mutex_init(&mdres->mutex, NULL);
1935         INIT_LIST_HEAD(&mdres->list);
1936         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1937         mdres->in = in;
1938         mdres->out = out;
1939         mdres->old_restore = old_restore;
1940         mdres->chunk_tree.rb_node = NULL;
1941         mdres->fixup_offset = fixup_offset;
1942         mdres->info = info;
1943         mdres->multi_devices = multi_devices;
1944         mdres->clear_space_cache = 0;
1945         mdres->last_physical_offset = 0;
1946         mdres->alloced_chunks = 0;
1947
1948         if (!num_threads)
1949                 return 0;
1950
1951         mdres->num_threads = num_threads;
1952         for (i = 0; i < num_threads; i++) {
1953                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1954                                      mdres);
1955                 if (ret) {
1956                         /* pthread_create returns errno directly */
1957                         ret = -ret;
1958                         break;
1959                 }
1960         }
1961         if (ret)
1962                 mdrestore_destroy(mdres, i + 1);
1963         return ret;
1964 }
1965
1966 static int fill_mdres_info(struct mdrestore_struct *mdres,
1967                            struct async_work *async)
1968 {
1969         struct btrfs_super_block *super;
1970         u8 *buffer = NULL;
1971         u8 *outbuf;
1972         int ret;
1973
1974         /* We've already been initialized */
1975         if (mdres->nodesize)
1976                 return 0;
1977
1978         if (mdres->compress_method == COMPRESS_ZLIB) {
1979                 size_t size = MAX_PENDING_SIZE * 2;
1980
1981                 buffer = malloc(MAX_PENDING_SIZE * 2);
1982                 if (!buffer)
1983                         return -ENOMEM;
1984                 ret = uncompress(buffer, (unsigned long *)&size,
1985                                  async->buffer, async->bufsize);
1986                 if (ret != Z_OK) {
1987                         error("decompression failed with %d", ret);
1988                         free(buffer);
1989                         return -EIO;
1990                 }
1991                 outbuf = buffer;
1992         } else {
1993                 outbuf = async->buffer;
1994         }
1995
1996         super = (struct btrfs_super_block *)outbuf;
1997         mdres->nodesize = btrfs_super_nodesize(super);
1998         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1999         memcpy(mdres->uuid, super->dev_item.uuid,
2000                        BTRFS_UUID_SIZE);
2001         mdres->devid = le64_to_cpu(super->dev_item.devid);
2002         free(buffer);
2003         return 0;
2004 }
2005
2006 static int add_cluster(struct meta_cluster *cluster,
2007                        struct mdrestore_struct *mdres, u64 *next)
2008 {
2009         struct meta_cluster_item *item;
2010         struct meta_cluster_header *header = &cluster->header;
2011         struct async_work *async;
2012         u64 bytenr;
2013         u32 i, nritems;
2014         int ret;
2015
2016         pthread_mutex_lock(&mdres->mutex);
2017         mdres->compress_method = header->compress;
2018         pthread_mutex_unlock(&mdres->mutex);
2019
2020         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
2021         nritems = le32_to_cpu(header->nritems);
2022         for (i = 0; i < nritems; i++) {
2023                 item = &cluster->items[i];
2024                 async = calloc(1, sizeof(*async));
2025                 if (!async) {
2026                         error("not enough memory for async data");
2027                         return -ENOMEM;
2028                 }
2029                 async->start = le64_to_cpu(item->bytenr);
2030                 async->bufsize = le32_to_cpu(item->size);
2031                 async->buffer = malloc(async->bufsize);
2032                 if (!async->buffer) {
2033                         error("not enough memory for async buffer");
2034                         free(async);
2035                         return -ENOMEM;
2036                 }
2037                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
2038                 if (ret != 1) {
2039                         error("unable to read buffer: %s", strerror(errno));
2040                         free(async->buffer);
2041                         free(async);
2042                         return -EIO;
2043                 }
2044                 bytenr += async->bufsize;
2045
2046                 pthread_mutex_lock(&mdres->mutex);
2047                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
2048                         ret = fill_mdres_info(mdres, async);
2049                         if (ret) {
2050                                 error("unable to set up restore state");
2051                                 pthread_mutex_unlock(&mdres->mutex);
2052                                 free(async->buffer);
2053                                 free(async);
2054                                 return ret;
2055                         }
2056                 }
2057                 list_add_tail(&async->list, &mdres->list);
2058                 mdres->num_items++;
2059                 pthread_cond_signal(&mdres->cond);
2060                 pthread_mutex_unlock(&mdres->mutex);
2061         }
2062         if (bytenr & BLOCK_MASK) {
2063                 char buffer[BLOCK_MASK];
2064                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
2065
2066                 bytenr += size;
2067                 ret = fread(buffer, size, 1, mdres->in);
2068                 if (ret != 1) {
2069                         error("failed to read buffer: %s", strerror(errno));
2070                         return -EIO;
2071                 }
2072         }
2073         *next = bytenr;
2074         return 0;
2075 }
2076
2077 static int wait_for_worker(struct mdrestore_struct *mdres)
2078 {
2079         int ret = 0;
2080
2081         pthread_mutex_lock(&mdres->mutex);
2082         ret = mdres->error;
2083         while (!ret && mdres->num_items > 0) {
2084                 struct timespec ts = {
2085                         .tv_sec = 0,
2086                         .tv_nsec = 10000000,
2087                 };
2088                 pthread_mutex_unlock(&mdres->mutex);
2089                 nanosleep(&ts, NULL);
2090                 pthread_mutex_lock(&mdres->mutex);
2091                 ret = mdres->error;
2092         }
2093         pthread_mutex_unlock(&mdres->mutex);
2094         return ret;
2095 }
2096
2097 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2098                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2099                             u64 cluster_bytenr)
2100 {
2101         struct extent_buffer *eb;
2102         int ret = 0;
2103         int i;
2104
2105         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2106         if (!eb) {
2107                 ret = -ENOMEM;
2108                 goto out;
2109         }
2110
2111         while (item_bytenr != bytenr) {
2112                 buffer += mdres->nodesize;
2113                 item_bytenr += mdres->nodesize;
2114         }
2115
2116         memcpy(eb->data, buffer, mdres->nodesize);
2117         if (btrfs_header_bytenr(eb) != bytenr) {
2118                 error("eb bytenr does not match found bytenr: %llu != %llu",
2119                                 (unsigned long long)btrfs_header_bytenr(eb),
2120                                 (unsigned long long)bytenr);
2121                 ret = -EIO;
2122                 goto out;
2123         }
2124
2125         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2126                    BTRFS_FSID_SIZE)) {
2127                 error("filesystem UUID of eb %llu does not match",
2128                                 (unsigned long long)bytenr);
2129                 ret = -EIO;
2130                 goto out;
2131         }
2132
2133         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2134                 error("wrong eb %llu owner %llu",
2135                                 (unsigned long long)bytenr,
2136                                 (unsigned long long)btrfs_header_owner(eb));
2137                 ret = -EIO;
2138                 goto out;
2139         }
2140
2141         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2142                 struct btrfs_chunk *chunk;
2143                 struct fs_chunk *fs_chunk;
2144                 struct btrfs_key key;
2145                 u64 type;
2146
2147                 if (btrfs_header_level(eb)) {
2148                         u64 blockptr = btrfs_node_blockptr(eb, i);
2149
2150                         ret = search_for_chunk_blocks(mdres, blockptr,
2151                                                       cluster_bytenr);
2152                         if (ret)
2153                                 break;
2154                         continue;
2155                 }
2156
2157                 /* Yay a leaf!  We loves leafs! */
2158                 btrfs_item_key_to_cpu(eb, &key, i);
2159                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2160                         continue;
2161
2162                 fs_chunk = malloc(sizeof(struct fs_chunk));
2163                 if (!fs_chunk) {
2164                         error("not enough memory to allocate chunk");
2165                         ret = -ENOMEM;
2166                         break;
2167                 }
2168                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2169                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2170
2171                 fs_chunk->logical = key.offset;
2172                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2173                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2174                 INIT_LIST_HEAD(&fs_chunk->list);
2175                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2176                                 physical_cmp, 1) != NULL)
2177                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2178                 else
2179                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2180                                     physical_cmp);
2181
2182                 type = btrfs_chunk_type(eb, chunk);
2183                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2184                         fs_chunk->physical_dup =
2185                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2186                 }
2187
2188                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2189                     mdres->last_physical_offset)
2190                         mdres->last_physical_offset = fs_chunk->physical_dup +
2191                                 fs_chunk->bytes;
2192                 else if (fs_chunk->physical + fs_chunk->bytes >
2193                     mdres->last_physical_offset)
2194                         mdres->last_physical_offset = fs_chunk->physical +
2195                                 fs_chunk->bytes;
2196                 mdres->alloced_chunks += fs_chunk->bytes;
2197                 /* in dup case, fs_chunk->bytes should add twice */
2198                 if (fs_chunk->physical_dup)
2199                         mdres->alloced_chunks += fs_chunk->bytes;
2200                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2201         }
2202 out:
2203         free(eb);
2204         return ret;
2205 }
2206
2207 /* If you have to ask you aren't worthy */
2208 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2209                                    u64 search, u64 cluster_bytenr)
2210 {
2211         struct meta_cluster *cluster;
2212         struct meta_cluster_header *header;
2213         struct meta_cluster_item *item;
2214         u64 current_cluster = cluster_bytenr, bytenr;
2215         u64 item_bytenr;
2216         u32 bufsize, nritems, i;
2217         u32 max_size = MAX_PENDING_SIZE * 2;
2218         u8 *buffer, *tmp = NULL;
2219         int ret = 0;
2220
2221         cluster = malloc(BLOCK_SIZE);
2222         if (!cluster) {
2223                 error("not enough memory for cluster");
2224                 return -ENOMEM;
2225         }
2226
2227         buffer = malloc(max_size);
2228         if (!buffer) {
2229                 error("not enough memory for buffer");
2230                 free(cluster);
2231                 return -ENOMEM;
2232         }
2233
2234         if (mdres->compress_method == COMPRESS_ZLIB) {
2235                 tmp = malloc(max_size);
2236                 if (!tmp) {
2237                         error("not enough memory for buffer");
2238                         free(cluster);
2239                         free(buffer);
2240                         return -ENOMEM;
2241                 }
2242         }
2243
2244         bytenr = current_cluster;
2245         while (1) {
2246                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2247                         error("seek failed: %s", strerror(errno));
2248                         ret = -EIO;
2249                         break;
2250                 }
2251
2252                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2253                 if (ret == 0) {
2254                         if (cluster_bytenr != 0) {
2255                                 cluster_bytenr = 0;
2256                                 current_cluster = 0;
2257                                 bytenr = 0;
2258                                 continue;
2259                         }
2260                         error(
2261         "unknown state after reading cluster at %llu, probably corrupted data",
2262                                         cluster_bytenr);
2263                         ret = -EIO;
2264                         break;
2265                 } else if (ret < 0) {
2266                         error("unable to read image at %llu: %s",
2267                                         (unsigned long long)cluster_bytenr,
2268                                         strerror(errno));
2269                         break;
2270                 }
2271                 ret = 0;
2272
2273                 header = &cluster->header;
2274                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2275                     le64_to_cpu(header->bytenr) != current_cluster) {
2276                         error("bad header in metadump image");
2277                         ret = -EIO;
2278                         break;
2279                 }
2280
2281                 bytenr += BLOCK_SIZE;
2282                 nritems = le32_to_cpu(header->nritems);
2283                 for (i = 0; i < nritems; i++) {
2284                         size_t size;
2285
2286                         item = &cluster->items[i];
2287                         bufsize = le32_to_cpu(item->size);
2288                         item_bytenr = le64_to_cpu(item->bytenr);
2289
2290                         if (bufsize > max_size) {
2291                                 error("item %u too big: %u > %u", i, bufsize,
2292                                                 max_size);
2293                                 ret = -EIO;
2294                                 break;
2295                         }
2296
2297                         if (mdres->compress_method == COMPRESS_ZLIB) {
2298                                 ret = fread(tmp, bufsize, 1, mdres->in);
2299                                 if (ret != 1) {
2300                                         error("read error: %s", strerror(errno));
2301                                         ret = -EIO;
2302                                         break;
2303                                 }
2304
2305                                 size = max_size;
2306                                 ret = uncompress(buffer,
2307                                                  (unsigned long *)&size, tmp,
2308                                                  bufsize);
2309                                 if (ret != Z_OK) {
2310                                         error("decompression failed with %d",
2311                                                         ret);
2312                                         ret = -EIO;
2313                                         break;
2314                                 }
2315                         } else {
2316                                 ret = fread(buffer, bufsize, 1, mdres->in);
2317                                 if (ret != 1) {
2318                                         error("read error: %s",
2319                                                         strerror(errno));
2320                                         ret = -EIO;
2321                                         break;
2322                                 }
2323                                 size = bufsize;
2324                         }
2325                         ret = 0;
2326
2327                         if (item_bytenr <= search &&
2328                             item_bytenr + size > search) {
2329                                 ret = read_chunk_block(mdres, buffer, search,
2330                                                        item_bytenr, size,
2331                                                        current_cluster);
2332                                 if (!ret)
2333                                         ret = 1;
2334                                 break;
2335                         }
2336                         bytenr += bufsize;
2337                 }
2338                 if (ret) {
2339                         if (ret > 0)
2340                                 ret = 0;
2341                         break;
2342                 }
2343                 if (bytenr & BLOCK_MASK)
2344                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2345                 current_cluster = bytenr;
2346         }
2347
2348         free(tmp);
2349         free(buffer);
2350         free(cluster);
2351         return ret;
2352 }
2353
2354 static int build_chunk_tree(struct mdrestore_struct *mdres,
2355                             struct meta_cluster *cluster)
2356 {
2357         struct btrfs_super_block *super;
2358         struct meta_cluster_header *header;
2359         struct meta_cluster_item *item = NULL;
2360         u64 chunk_root_bytenr = 0;
2361         u32 i, nritems;
2362         u64 bytenr = 0;
2363         u8 *buffer;
2364         int ret;
2365
2366         /* We can't seek with stdin so don't bother doing this */
2367         if (mdres->in == stdin)
2368                 return 0;
2369
2370         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2371         if (ret <= 0) {
2372                 error("unable to read cluster: %s", strerror(errno));
2373                 return -EIO;
2374         }
2375         ret = 0;
2376
2377         header = &cluster->header;
2378         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2379             le64_to_cpu(header->bytenr) != 0) {
2380                 error("bad header in metadump image");
2381                 return -EIO;
2382         }
2383
2384         bytenr += BLOCK_SIZE;
2385         mdres->compress_method = header->compress;
2386         nritems = le32_to_cpu(header->nritems);
2387         for (i = 0; i < nritems; i++) {
2388                 item = &cluster->items[i];
2389
2390                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2391                         break;
2392                 bytenr += le32_to_cpu(item->size);
2393                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2394                         error("seek failed: %s", strerror(errno));
2395                         return -EIO;
2396                 }
2397         }
2398
2399         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2400                 error("did not find superblock at %llu",
2401                                 le64_to_cpu(item->bytenr));
2402                 return -EINVAL;
2403         }
2404
2405         buffer = malloc(le32_to_cpu(item->size));
2406         if (!buffer) {
2407                 error("not enough memory to allocate buffer");
2408                 return -ENOMEM;
2409         }
2410
2411         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2412         if (ret != 1) {
2413                 error("unable to read buffer: %s", strerror(errno));
2414                 free(buffer);
2415                 return -EIO;
2416         }
2417
2418         if (mdres->compress_method == COMPRESS_ZLIB) {
2419                 size_t size = MAX_PENDING_SIZE * 2;
2420                 u8 *tmp;
2421
2422                 tmp = malloc(MAX_PENDING_SIZE * 2);
2423                 if (!tmp) {
2424                         free(buffer);
2425                         return -ENOMEM;
2426                 }
2427                 ret = uncompress(tmp, (unsigned long *)&size,
2428                                  buffer, le32_to_cpu(item->size));
2429                 if (ret != Z_OK) {
2430                         error("decompression failed with %d", ret);
2431                         free(buffer);
2432                         free(tmp);
2433                         return -EIO;
2434                 }
2435                 free(buffer);
2436                 buffer = tmp;
2437         }
2438
2439         pthread_mutex_lock(&mdres->mutex);
2440         super = (struct btrfs_super_block *)buffer;
2441         chunk_root_bytenr = btrfs_super_chunk_root(super);
2442         mdres->nodesize = btrfs_super_nodesize(super);
2443         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2444         memcpy(mdres->uuid, super->dev_item.uuid,
2445                        BTRFS_UUID_SIZE);
2446         mdres->devid = le64_to_cpu(super->dev_item.devid);
2447         free(buffer);
2448         pthread_mutex_unlock(&mdres->mutex);
2449
2450         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2451 }
2452
2453 static int range_contains_super(u64 physical, u64 bytes)
2454 {
2455         u64 super_bytenr;
2456         int i;
2457
2458         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2459                 super_bytenr = btrfs_sb_offset(i);
2460                 if (super_bytenr >= physical &&
2461                     super_bytenr < physical + bytes)
2462                         return 1;
2463         }
2464
2465         return 0;
2466 }
2467
2468 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2469 {
2470         struct fs_chunk *fs_chunk;
2471
2472         while (!list_empty(&mdres->overlapping_chunks)) {
2473                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2474                                             struct fs_chunk, list);
2475                 list_del_init(&fs_chunk->list);
2476                 if (range_contains_super(fs_chunk->physical,
2477                                          fs_chunk->bytes)) {
2478                         warning(
2479 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2480                         mdres->clear_space_cache = 1;
2481                 }
2482                 fs_chunk->physical = mdres->last_physical_offset;
2483                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2484                 mdres->last_physical_offset += fs_chunk->bytes;
2485         }
2486 }
2487
2488 static int fixup_devices(struct btrfs_fs_info *fs_info,
2489                          struct mdrestore_struct *mdres, off_t dev_size)
2490 {
2491         struct btrfs_trans_handle *trans;
2492         struct btrfs_dev_item *dev_item;
2493         struct btrfs_path path;
2494         struct extent_buffer *leaf;
2495         struct btrfs_root *root = fs_info->chunk_root;
2496         struct btrfs_key key;
2497         u64 devid, cur_devid;
2498         int ret;
2499
2500         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2501         if (IS_ERR(trans)) {
2502                 error("cannot starting transaction %ld", PTR_ERR(trans));
2503                 return PTR_ERR(trans);
2504         }
2505
2506         dev_item = &fs_info->super_copy->dev_item;
2507
2508         devid = btrfs_stack_device_id(dev_item);
2509
2510         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2511         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2512
2513         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2514         key.type = BTRFS_DEV_ITEM_KEY;
2515         key.offset = 0;
2516
2517         btrfs_init_path(&path);
2518
2519 again:
2520         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2521         if (ret < 0) {
2522                 error("search failed: %d", ret);
2523                 exit(1);
2524         }
2525
2526         while (1) {
2527                 leaf = path.nodes[0];
2528                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2529                         ret = btrfs_next_leaf(root, &path);
2530                         if (ret < 0) {
2531                                 error("cannot go to next leaf %d", ret);
2532                                 exit(1);
2533                         }
2534                         if (ret > 0) {
2535                                 ret = 0;
2536                                 break;
2537                         }
2538                         leaf = path.nodes[0];
2539                 }
2540
2541                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2542                 if (key.type > BTRFS_DEV_ITEM_KEY)
2543                         break;
2544                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2545                         path.slots[0]++;
2546                         continue;
2547                 }
2548
2549                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2550                                           struct btrfs_dev_item);
2551                 cur_devid = btrfs_device_id(leaf, dev_item);
2552                 if (devid != cur_devid) {
2553                         ret = btrfs_del_item(trans, root, &path);
2554                         if (ret) {
2555                                 error("cannot delete item: %d", ret);
2556                                 exit(1);
2557                         }
2558                         btrfs_release_path(&path);
2559                         goto again;
2560                 }
2561
2562                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2563                 btrfs_set_device_bytes_used(leaf, dev_item,
2564                                             mdres->alloced_chunks);
2565                 btrfs_mark_buffer_dirty(leaf);
2566                 path.slots[0]++;
2567         }
2568
2569         btrfs_release_path(&path);
2570         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2571         if (ret) {
2572                 error("unable to commit transaction: %d", ret);
2573                 return ret;
2574         }
2575         return 0;
2576 }
2577
2578 static int restore_metadump(const char *input, FILE *out, int old_restore,
2579                             int num_threads, int fixup_offset,
2580                             const char *target, int multi_devices)
2581 {
2582         struct meta_cluster *cluster = NULL;
2583         struct meta_cluster_header *header;
2584         struct mdrestore_struct mdrestore;
2585         struct btrfs_fs_info *info = NULL;
2586         u64 bytenr = 0;
2587         FILE *in = NULL;
2588         int ret = 0;
2589
2590         if (!strcmp(input, "-")) {
2591                 in = stdin;
2592         } else {
2593                 in = fopen(input, "r");
2594                 if (!in) {
2595                         error("unable to open metadump image: %s",
2596                                         strerror(errno));
2597                         return 1;
2598                 }
2599         }
2600
2601         /* NOTE: open with write mode */
2602         if (fixup_offset) {
2603                 info = open_ctree_fs_info(target, 0, 0, 0,
2604                                           OPEN_CTREE_WRITES |
2605                                           OPEN_CTREE_RESTORE |
2606                                           OPEN_CTREE_PARTIAL);
2607                 if (!info) {
2608                         error("open ctree failed");
2609                         ret = -EIO;
2610                         goto failed_open;
2611                 }
2612         }
2613
2614         cluster = malloc(BLOCK_SIZE);
2615         if (!cluster) {
2616                 error("not enough memory for cluster");
2617                 ret = -ENOMEM;
2618                 goto failed_info;
2619         }
2620
2621         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2622                              fixup_offset, info, multi_devices);
2623         if (ret) {
2624                 error("failed to initialize metadata restore state: %d", ret);
2625                 goto failed_cluster;
2626         }
2627
2628         if (!multi_devices && !old_restore) {
2629                 ret = build_chunk_tree(&mdrestore, cluster);
2630                 if (ret)
2631                         goto out;
2632                 if (!list_empty(&mdrestore.overlapping_chunks))
2633                         remap_overlapping_chunks(&mdrestore);
2634         }
2635
2636         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2637                 error("seek failed: %s", strerror(errno));
2638                 goto out;
2639         }
2640
2641         while (!mdrestore.error) {
2642                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2643                 if (!ret)
2644                         break;
2645
2646                 header = &cluster->header;
2647                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2648                     le64_to_cpu(header->bytenr) != bytenr) {
2649                         error("bad header in metadump image");
2650                         ret = -EIO;
2651                         break;
2652                 }
2653                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2654                 if (ret) {
2655                         error("failed to add cluster: %d", ret);
2656                         break;
2657                 }
2658         }
2659         ret = wait_for_worker(&mdrestore);
2660
2661         if (!ret && !multi_devices && !old_restore) {
2662                 struct btrfs_root *root;
2663                 struct stat st;
2664
2665                 root = open_ctree_fd(fileno(out), target, 0,
2666                                           OPEN_CTREE_PARTIAL |
2667                                           OPEN_CTREE_WRITES |
2668                                           OPEN_CTREE_NO_DEVICES);
2669                 if (!root) {
2670                         error("open ctree failed in %s", target);
2671                         ret = -EIO;
2672                         goto out;
2673                 }
2674                 info = root->fs_info;
2675
2676                 if (stat(target, &st)) {
2677                         error("stat %s failed: %s", target, strerror(errno));
2678                         close_ctree(info->chunk_root);
2679                         free(cluster);
2680                         return 1;
2681                 }
2682
2683                 ret = fixup_devices(info, &mdrestore, st.st_size);
2684                 close_ctree(info->chunk_root);
2685                 if (ret)
2686                         goto out;
2687         }
2688 out:
2689         mdrestore_destroy(&mdrestore, num_threads);
2690 failed_cluster:
2691         free(cluster);
2692 failed_info:
2693         if (fixup_offset && info)
2694                 close_ctree(info->chunk_root);
2695 failed_open:
2696         if (in != stdin)
2697                 fclose(in);
2698         return ret;
2699 }
2700
2701 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2702                                        const char *other_dev, u64 cur_devid)
2703 {
2704         struct btrfs_key key;
2705         struct extent_buffer *leaf;
2706         struct btrfs_path path;
2707         struct btrfs_dev_item *dev_item;
2708         struct btrfs_super_block *disk_super;
2709         char dev_uuid[BTRFS_UUID_SIZE];
2710         char fs_uuid[BTRFS_UUID_SIZE];
2711         u64 devid, type, io_align, io_width;
2712         u64 sector_size, total_bytes, bytes_used;
2713         char buf[BTRFS_SUPER_INFO_SIZE];
2714         int fp = -1;
2715         int ret;
2716
2717         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2718         key.type = BTRFS_DEV_ITEM_KEY;
2719         key.offset = cur_devid;
2720
2721         btrfs_init_path(&path);
2722         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2723         if (ret) {
2724                 error("search key failed: %d", ret);
2725                 ret = -EIO;
2726                 goto out;
2727         }
2728
2729         leaf = path.nodes[0];
2730         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2731                                   struct btrfs_dev_item);
2732
2733         devid = btrfs_device_id(leaf, dev_item);
2734         if (devid != cur_devid) {
2735                 error("devid mismatch: %llu != %llu",
2736                                 (unsigned long long)devid,
2737                                 (unsigned long long)cur_devid);
2738                 ret = -EIO;
2739                 goto out;
2740         }
2741
2742         type = btrfs_device_type(leaf, dev_item);
2743         io_align = btrfs_device_io_align(leaf, dev_item);
2744         io_width = btrfs_device_io_width(leaf, dev_item);
2745         sector_size = btrfs_device_sector_size(leaf, dev_item);
2746         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2747         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2748         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2749         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2750
2751         btrfs_release_path(&path);
2752
2753         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2754
2755         /* update other devices' super block */
2756         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2757         if (fp < 0) {
2758                 error("could not open %s: %s", other_dev, strerror(errno));
2759                 ret = -EIO;
2760                 goto out;
2761         }
2762
2763         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2764
2765         disk_super = (struct btrfs_super_block *)buf;
2766         dev_item = &disk_super->dev_item;
2767
2768         btrfs_set_stack_device_type(dev_item, type);
2769         btrfs_set_stack_device_id(dev_item, devid);
2770         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2771         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2772         btrfs_set_stack_device_io_align(dev_item, io_align);
2773         btrfs_set_stack_device_io_width(dev_item, io_width);
2774         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2775         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2776         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2777         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2778
2779         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2780         if (ret != BTRFS_SUPER_INFO_SIZE) {
2781                 if (ret < 0)
2782                         error("cannot write superblock: %s", strerror(ret));
2783                 else
2784                         error("cannot write superblock");
2785                 ret = -EIO;
2786                 goto out;
2787         }
2788
2789         write_backup_supers(fp, (u8 *)buf);
2790
2791 out:
2792         if (fp != -1)
2793                 close(fp);
2794         return ret;
2795 }
2796
2797 static void print_usage(int ret)
2798 {
2799         printf("usage: btrfs-image [options] source target\n");
2800         printf("\t-r      \trestore metadump image\n");
2801         printf("\t-c value\tcompression level (0 ~ 9)\n");
2802         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2803         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2804         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2805         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2806         printf("\t-m       \trestore for multiple devices\n");
2807         printf("\n");
2808         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2809         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2810         exit(ret);
2811 }
2812
2813 int main(int argc, char *argv[])
2814 {
2815         char *source;
2816         char *target;
2817         u64 num_threads = 0;
2818         u64 compress_level = 0;
2819         int create = 1;
2820         int old_restore = 0;
2821         int walk_trees = 0;
2822         int multi_devices = 0;
2823         int ret;
2824         int sanitize = 0;
2825         int dev_cnt = 0;
2826         int usage_error = 0;
2827         FILE *out;
2828
2829         while (1) {
2830                 static const struct option long_options[] = {
2831                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2832                         { NULL, 0, NULL, 0 }
2833                 };
2834                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2835                 if (c < 0)
2836                         break;
2837                 switch (c) {
2838                 case 'r':
2839                         create = 0;
2840                         break;
2841                 case 't':
2842                         num_threads = arg_strtou64(optarg);
2843                         if (num_threads > MAX_WORKER_THREADS) {
2844                                 error("number of threads out of range: %llu > %d",
2845                                         (unsigned long long)num_threads,
2846                                         MAX_WORKER_THREADS);
2847                                 return 1;
2848                         }
2849                         break;
2850                 case 'c':
2851                         compress_level = arg_strtou64(optarg);
2852                         if (compress_level > 9) {
2853                                 error("compression level out of range: %llu",
2854                                         (unsigned long long)compress_level);
2855                                 return 1;
2856                         }
2857                         break;
2858                 case 'o':
2859                         old_restore = 1;
2860                         break;
2861                 case 's':
2862                         sanitize++;
2863                         break;
2864                 case 'w':
2865                         walk_trees = 1;
2866                         break;
2867                 case 'm':
2868                         create = 0;
2869                         multi_devices = 1;
2870                         break;
2871                         case GETOPT_VAL_HELP:
2872                 default:
2873                         print_usage(c != GETOPT_VAL_HELP);
2874                 }
2875         }
2876
2877         set_argv0(argv);
2878         if (check_argc_min(argc - optind, 2))
2879                 print_usage(1);
2880
2881         dev_cnt = argc - optind - 1;
2882
2883         if (create) {
2884                 if (old_restore) {
2885                         error(
2886                         "create and restore cannot be used at the same time");
2887                         usage_error++;
2888                 }
2889         } else {
2890                 if (walk_trees || sanitize || compress_level) {
2891                         error(
2892                         "useing -w, -s, -c options for restore makes no sense");
2893                         usage_error++;
2894                 }
2895                 if (multi_devices && dev_cnt < 2) {
2896                         error("not enough devices specified for -m option");
2897                         usage_error++;
2898                 }
2899                 if (!multi_devices && dev_cnt != 1) {
2900                         error("accepts only 1 device without -m option");
2901                         usage_error++;
2902                 }
2903         }
2904
2905         if (usage_error)
2906                 print_usage(1);
2907
2908         source = argv[optind];
2909         target = argv[optind + 1];
2910
2911         if (create && !strcmp(target, "-")) {
2912                 out = stdout;
2913         } else {
2914                 out = fopen(target, "w+");
2915                 if (!out) {
2916                         error("unable to create target file %s", target);
2917                         exit(1);
2918                 }
2919         }
2920
2921         if (compress_level > 0 || create == 0) {
2922                 if (num_threads == 0) {
2923                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2924
2925                         if (tmp <= 0)
2926                                 tmp = 1;
2927                         num_threads = tmp;
2928                 }
2929         } else {
2930                 num_threads = 0;
2931         }
2932
2933         if (create) {
2934                 ret = check_mounted(source);
2935                 if (ret < 0) {
2936                         warning("unable to check mount status of: %s",
2937                                         strerror(-ret));
2938                 } else if (ret) {
2939                         warning("%s already mounted, results may be inaccurate",
2940                                         source);
2941                 }
2942
2943                 ret = create_metadump(source, out, num_threads,
2944                                       compress_level, sanitize, walk_trees);
2945         } else {
2946                 ret = restore_metadump(source, out, old_restore, num_threads,
2947                                        0, target, multi_devices);
2948         }
2949         if (ret) {
2950                 error("%s failed: %s", (create) ? "create" : "restore",
2951                        strerror(errno));
2952                 goto out;
2953         }
2954
2955          /* extended support for multiple devices */
2956         if (!create && multi_devices) {
2957                 struct btrfs_fs_info *info;
2958                 u64 total_devs;
2959                 int i;
2960
2961                 info = open_ctree_fs_info(target, 0, 0, 0,
2962                                           OPEN_CTREE_PARTIAL |
2963                                           OPEN_CTREE_RESTORE);
2964                 if (!info) {
2965                         error("open ctree failed at %s", target);
2966                         return 1;
2967                 }
2968
2969                 total_devs = btrfs_super_num_devices(info->super_copy);
2970                 if (total_devs != dev_cnt) {
2971                         error("it needs %llu devices but has only %d",
2972                                 total_devs, dev_cnt);
2973                         close_ctree(info->chunk_root);
2974                         goto out;
2975                 }
2976
2977                 /* update super block on other disks */
2978                 for (i = 2; i <= dev_cnt; i++) {
2979                         ret = update_disk_super_on_device(info,
2980                                         argv[optind + i], (u64)i);
2981                         if (ret) {
2982                                 error("update disk superblock failed devid %d: %d",
2983                                         i, ret);
2984                                 close_ctree(info->chunk_root);
2985                                 exit(1);
2986                         }
2987                 }
2988
2989                 close_ctree(info->chunk_root);
2990
2991                 /* fix metadata block to map correct chunk */
2992                 ret = restore_metadump(source, out, 0, num_threads, 1,
2993                                        target, 1);
2994                 if (ret) {
2995                         error("unable to fixup metadump: %d", ret);
2996                         exit(1);
2997                 }
2998         }
2999 out:
3000         if (out == stdout) {
3001                 fflush(out);
3002         } else {
3003                 fclose(out);
3004                 if (ret && create) {
3005                         int unlink_ret;
3006
3007                         unlink_ret = unlink(target);
3008                         if (unlink_ret)
3009                                 error("unlink output file %s failed: %s",
3010                                                 target, strerror(errno));
3011                 }
3012         }
3013
3014         btrfs_close_all_devices();
3015
3016         return !!ret;
3017 }