btrfs-progs: image: pass sanitize mode and name tree separately to sanitize_name
[platform/upstream/btrfs-progs.git] / image / main.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <dirent.h>
27 #include <zlib.h>
28 #include <getopt.h>
29
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "volumes.h"
37 #include "extent_io.h"
38 #include "help.h"
39 #include "image/metadump.h"
40 #include "image/sanitize.h"
41
42 #define MAX_WORKER_THREADS      (32)
43
44 struct async_work {
45         struct list_head list;
46         struct list_head ordered;
47         u64 start;
48         u64 size;
49         u8 *buffer;
50         size_t bufsize;
51         int error;
52 };
53
54 struct metadump_struct {
55         struct btrfs_root *root;
56         FILE *out;
57
58         union {
59                 struct meta_cluster cluster;
60                 char meta_cluster_bytes[BLOCK_SIZE];
61         };
62
63         pthread_t threads[MAX_WORKER_THREADS];
64         size_t num_threads;
65         pthread_mutex_t mutex;
66         pthread_cond_t cond;
67         struct rb_root name_tree;
68
69         struct list_head list;
70         struct list_head ordered;
71         size_t num_items;
72         size_t num_ready;
73
74         u64 pending_start;
75         u64 pending_size;
76
77         int compress_level;
78         int done;
79         int data;
80         enum sanitize_mode sanitize_names;
81
82         int error;
83 };
84
85 struct mdrestore_struct {
86         FILE *in;
87         FILE *out;
88
89         pthread_t threads[MAX_WORKER_THREADS];
90         size_t num_threads;
91         pthread_mutex_t mutex;
92         pthread_cond_t cond;
93
94         struct rb_root chunk_tree;
95         struct rb_root physical_tree;
96         struct list_head list;
97         struct list_head overlapping_chunks;
98         size_t num_items;
99         u32 nodesize;
100         u64 devid;
101         u64 alloced_chunks;
102         u64 last_physical_offset;
103         u8 uuid[BTRFS_UUID_SIZE];
104         u8 fsid[BTRFS_FSID_SIZE];
105
106         int compress_method;
107         int done;
108         int error;
109         int old_restore;
110         int fixup_offset;
111         int multi_devices;
112         int clear_space_cache;
113         struct btrfs_fs_info *info;
114 };
115
116 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
117                                    u64 search, u64 cluster_bytenr);
118 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
119
120 static void csum_block(u8 *buf, size_t len)
121 {
122         u8 result[BTRFS_CRC32_SIZE];
123         u32 crc = ~(u32)0;
124         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
125         btrfs_csum_final(crc, result);
126         memcpy(buf, result, BTRFS_CRC32_SIZE);
127 }
128
129 static int has_name(struct btrfs_key *key)
130 {
131         switch (key->type) {
132         case BTRFS_DIR_ITEM_KEY:
133         case BTRFS_DIR_INDEX_KEY:
134         case BTRFS_INODE_REF_KEY:
135         case BTRFS_INODE_EXTREF_KEY:
136         case BTRFS_XATTR_ITEM_KEY:
137                 return 1;
138         default:
139                 break;
140         }
141
142         return 0;
143 }
144
145 static char *generate_garbage(u32 name_len)
146 {
147         char *buf = malloc(name_len);
148         int i;
149
150         if (!buf)
151                 return NULL;
152
153         for (i = 0; i < name_len; i++) {
154                 char c = rand_range(94) + 33;
155
156                 if (c == '/')
157                         c++;
158                 buf[i] = c;
159         }
160
161         return buf;
162 }
163
164 static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
165 {
166         struct name *entry = rb_entry(a, struct name, n);
167         struct name *ins = rb_entry(b, struct name, n);
168         u32 len;
169
170         len = min(ins->len, entry->len);
171         return memcmp(ins->val, entry->val, len);
172 }
173
174 static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
175 {
176         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l);
177         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l);
178
179         if (fuzz && ins->logical >= entry->logical &&
180             ins->logical < entry->logical + entry->bytes)
181                 return 0;
182
183         if (ins->logical < entry->logical)
184                 return -1;
185         else if (ins->logical > entry->logical)
186                 return 1;
187         return 0;
188 }
189
190 static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz)
191 {
192         struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p);
193         struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p);
194
195         if (fuzz && ins->physical >= entry->physical &&
196             ins->physical < entry->physical + entry->bytes)
197                 return 0;
198
199         if (fuzz && entry->physical >= ins->physical &&
200             entry->physical < ins->physical + ins->bytes)
201                 return 0;
202
203         if (ins->physical < entry->physical)
204                 return -1;
205         else if (ins->physical > entry->physical)
206                 return 1;
207         return 0;
208 }
209
210 static void tree_insert(struct rb_root *root, struct rb_node *ins,
211                         int (*cmp)(struct rb_node *a, struct rb_node *b,
212                                    int fuzz))
213 {
214         struct rb_node ** p = &root->rb_node;
215         struct rb_node * parent = NULL;
216         int dir;
217
218         while(*p) {
219                 parent = *p;
220
221                 dir = cmp(*p, ins, 1);
222                 if (dir < 0)
223                         p = &(*p)->rb_left;
224                 else if (dir > 0)
225                         p = &(*p)->rb_right;
226                 else
227                         BUG();
228         }
229
230         rb_link_node(ins, parent, p);
231         rb_insert_color(ins, root);
232 }
233
234 static struct rb_node *tree_search(struct rb_root *root,
235                                    struct rb_node *search,
236                                    int (*cmp)(struct rb_node *a,
237                                               struct rb_node *b, int fuzz),
238                                    int fuzz)
239 {
240         struct rb_node *n = root->rb_node;
241         int dir;
242
243         while (n) {
244                 dir = cmp(n, search, fuzz);
245                 if (dir < 0)
246                         n = n->rb_left;
247                 else if (dir > 0)
248                         n = n->rb_right;
249                 else
250                         return n;
251         }
252
253         return NULL;
254 }
255
256 static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical,
257                                u64 *size, u64 *physical_dup)
258 {
259         struct fs_chunk *fs_chunk;
260         struct rb_node *entry;
261         struct fs_chunk search;
262         u64 offset;
263
264         if (logical == BTRFS_SUPER_INFO_OFFSET)
265                 return logical;
266
267         search.logical = logical;
268         entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1);
269         if (!entry) {
270                 if (mdres->in != stdin)
271                         warning("cannot find a chunk, using logical");
272                 return logical;
273         }
274         fs_chunk = rb_entry(entry, struct fs_chunk, l);
275         if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical)
276                 BUG();
277         offset = search.logical - fs_chunk->logical;
278
279         if (physical_dup) {
280                 /* Only in dup case, physical_dup is not equal to 0 */
281                 if (fs_chunk->physical_dup)
282                         *physical_dup = fs_chunk->physical_dup + offset;
283                 else
284                         *physical_dup = 0;
285         }
286
287         *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical);
288         return fs_chunk->physical + offset;
289 }
290
291 /*
292  * Reverse CRC-32C table
293  */
294 static const u32 crc32c_rev_table[256] = {
295         0x00000000L,0x05EC76F1L,0x0BD8EDE2L,0x0E349B13L,
296         0x17B1DBC4L,0x125DAD35L,0x1C693626L,0x198540D7L,
297         0x2F63B788L,0x2A8FC179L,0x24BB5A6AL,0x21572C9BL,
298         0x38D26C4CL,0x3D3E1ABDL,0x330A81AEL,0x36E6F75FL,
299         0x5EC76F10L,0x5B2B19E1L,0x551F82F2L,0x50F3F403L,
300         0x4976B4D4L,0x4C9AC225L,0x42AE5936L,0x47422FC7L,
301         0x71A4D898L,0x7448AE69L,0x7A7C357AL,0x7F90438BL,
302         0x6615035CL,0x63F975ADL,0x6DCDEEBEL,0x6821984FL,
303         0xBD8EDE20L,0xB862A8D1L,0xB65633C2L,0xB3BA4533L,
304         0xAA3F05E4L,0xAFD37315L,0xA1E7E806L,0xA40B9EF7L,
305         0x92ED69A8L,0x97011F59L,0x9935844AL,0x9CD9F2BBL,
306         0x855CB26CL,0x80B0C49DL,0x8E845F8EL,0x8B68297FL,
307         0xE349B130L,0xE6A5C7C1L,0xE8915CD2L,0xED7D2A23L,
308         0xF4F86AF4L,0xF1141C05L,0xFF208716L,0xFACCF1E7L,
309         0xCC2A06B8L,0xC9C67049L,0xC7F2EB5AL,0xC21E9DABL,
310         0xDB9BDD7CL,0xDE77AB8DL,0xD043309EL,0xD5AF466FL,
311         0x7EF1CAB1L,0x7B1DBC40L,0x75292753L,0x70C551A2L,
312         0x69401175L,0x6CAC6784L,0x6298FC97L,0x67748A66L,
313         0x51927D39L,0x547E0BC8L,0x5A4A90DBL,0x5FA6E62AL,
314         0x4623A6FDL,0x43CFD00CL,0x4DFB4B1FL,0x48173DEEL,
315         0x2036A5A1L,0x25DAD350L,0x2BEE4843L,0x2E023EB2L,
316         0x37877E65L,0x326B0894L,0x3C5F9387L,0x39B3E576L,
317         0x0F551229L,0x0AB964D8L,0x048DFFCBL,0x0161893AL,
318         0x18E4C9EDL,0x1D08BF1CL,0x133C240FL,0x16D052FEL,
319         0xC37F1491L,0xC6936260L,0xC8A7F973L,0xCD4B8F82L,
320         0xD4CECF55L,0xD122B9A4L,0xDF1622B7L,0xDAFA5446L,
321         0xEC1CA319L,0xE9F0D5E8L,0xE7C44EFBL,0xE228380AL,
322         0xFBAD78DDL,0xFE410E2CL,0xF075953FL,0xF599E3CEL,
323         0x9DB87B81L,0x98540D70L,0x96609663L,0x938CE092L,
324         0x8A09A045L,0x8FE5D6B4L,0x81D14DA7L,0x843D3B56L,
325         0xB2DBCC09L,0xB737BAF8L,0xB90321EBL,0xBCEF571AL,
326         0xA56A17CDL,0xA086613CL,0xAEB2FA2FL,0xAB5E8CDEL,
327         0xFDE39562L,0xF80FE393L,0xF63B7880L,0xF3D70E71L,
328         0xEA524EA6L,0xEFBE3857L,0xE18AA344L,0xE466D5B5L,
329         0xD28022EAL,0xD76C541BL,0xD958CF08L,0xDCB4B9F9L,
330         0xC531F92EL,0xC0DD8FDFL,0xCEE914CCL,0xCB05623DL,
331         0xA324FA72L,0xA6C88C83L,0xA8FC1790L,0xAD106161L,
332         0xB49521B6L,0xB1795747L,0xBF4DCC54L,0xBAA1BAA5L,
333         0x8C474DFAL,0x89AB3B0BL,0x879FA018L,0x8273D6E9L,
334         0x9BF6963EL,0x9E1AE0CFL,0x902E7BDCL,0x95C20D2DL,
335         0x406D4B42L,0x45813DB3L,0x4BB5A6A0L,0x4E59D051L,
336         0x57DC9086L,0x5230E677L,0x5C047D64L,0x59E80B95L,
337         0x6F0EFCCAL,0x6AE28A3BL,0x64D61128L,0x613A67D9L,
338         0x78BF270EL,0x7D5351FFL,0x7367CAECL,0x768BBC1DL,
339         0x1EAA2452L,0x1B4652A3L,0x1572C9B0L,0x109EBF41L,
340         0x091BFF96L,0x0CF78967L,0x02C31274L,0x072F6485L,
341         0x31C993DAL,0x3425E52BL,0x3A117E38L,0x3FFD08C9L,
342         0x2678481EL,0x23943EEFL,0x2DA0A5FCL,0x284CD30DL,
343         0x83125FD3L,0x86FE2922L,0x88CAB231L,0x8D26C4C0L,
344         0x94A38417L,0x914FF2E6L,0x9F7B69F5L,0x9A971F04L,
345         0xAC71E85BL,0xA99D9EAAL,0xA7A905B9L,0xA2457348L,
346         0xBBC0339FL,0xBE2C456EL,0xB018DE7DL,0xB5F4A88CL,
347         0xDDD530C3L,0xD8394632L,0xD60DDD21L,0xD3E1ABD0L,
348         0xCA64EB07L,0xCF889DF6L,0xC1BC06E5L,0xC4507014L,
349         0xF2B6874BL,0xF75AF1BAL,0xF96E6AA9L,0xFC821C58L,
350         0xE5075C8FL,0xE0EB2A7EL,0xEEDFB16DL,0xEB33C79CL,
351         0x3E9C81F3L,0x3B70F702L,0x35446C11L,0x30A81AE0L,
352         0x292D5A37L,0x2CC12CC6L,0x22F5B7D5L,0x2719C124L,
353         0x11FF367BL,0x1413408AL,0x1A27DB99L,0x1FCBAD68L,
354         0x064EEDBFL,0x03A29B4EL,0x0D96005DL,0x087A76ACL,
355         0x605BEEE3L,0x65B79812L,0x6B830301L,0x6E6F75F0L,
356         0x77EA3527L,0x720643D6L,0x7C32D8C5L,0x79DEAE34L,
357         0x4F38596BL,0x4AD42F9AL,0x44E0B489L,0x410CC278L,
358         0x588982AFL,0x5D65F45EL,0x53516F4DL,0x56BD19BCL
359 };
360
361 /*
362  * Calculate a 4-byte suffix to match desired CRC32C
363  *
364  * @current_crc: CRC32C checksum of all bytes before the suffix
365  * @desired_crc: the checksum that we want to get after adding the suffix
366  *
367  * Outputs: @suffix: pointer to where the suffix will be written (4-bytes)
368  */
369 static void find_collision_calc_suffix(unsigned long current_crc,
370                                        unsigned long desired_crc,
371                                        char *suffix)
372 {
373         int i;
374
375         for(i = 3; i >= 0; i--) {
376                 desired_crc = (desired_crc << 8)
377                             ^ crc32c_rev_table[desired_crc >> 24 & 0xFF]
378                             ^ ((current_crc >> i * 8) & 0xFF);
379         }
380         for (i = 0; i < 4; i++)
381                 suffix[i] = (desired_crc >> i * 8) & 0xFF;
382 }
383
384 /*
385  * Check if suffix is valid according to our file name conventions
386  */
387 static int find_collision_is_suffix_valid(const char *suffix)
388 {
389         int i;
390         char c;
391
392         for (i = 0; i < 4; i++) {
393                 c = suffix[i];
394                 if (c < ' ' || c > 126 || c == '/')
395                         return 0;
396         }
397         return 1;
398 }
399
400 static int find_collision_reverse_crc32c(struct name *val, u32 name_len)
401 {
402         unsigned long checksum;
403         unsigned long current_checksum;
404         int found = 0;
405         int i;
406
407         /* There are no same length collisions of 4 or less bytes */
408         if (name_len <= 4)
409                 return 0;
410         checksum = crc32c(~1, val->val, name_len);
411         name_len -= 4;
412         memset(val->sub, ' ', name_len);
413         i = 0;
414         while (1) {
415                 current_checksum = crc32c(~1, val->sub, name_len);
416                 find_collision_calc_suffix(current_checksum,
417                                            checksum,
418                                            val->sub + name_len);
419                 if (find_collision_is_suffix_valid(val->sub + name_len) &&
420                     memcmp(val->sub, val->val, val->len)) {
421                         found = 1;
422                         break;
423                 }
424
425                 if (val->sub[i] == 126) {
426                         do {
427                                 i++;
428                                 if (i >= name_len)
429                                         break;
430                         } while (val->sub[i] == 126);
431
432                         if (i >= name_len)
433                                 break;
434                         val->sub[i]++;
435                         if (val->sub[i] == '/')
436                                 val->sub[i]++;
437                         memset(val->sub, ' ', i);
438                         i = 0;
439                         continue;
440                 } else {
441                         val->sub[i]++;
442                         if (val->sub[i] == '/')
443                                 val->sub[i]++;
444                 }
445         }
446         return found;
447 }
448
449 static char *find_collision(struct rb_root *name_tree, char *name,
450                             u32 name_len)
451 {
452         struct name *val;
453         struct rb_node *entry;
454         struct name tmp;
455         int found;
456         int i;
457
458         tmp.val = name;
459         tmp.len = name_len;
460         entry = tree_search(name_tree, &tmp.n, name_cmp, 0);
461         if (entry) {
462                 val = rb_entry(entry, struct name, n);
463                 free(name);
464                 return val->sub;
465         }
466
467         val = malloc(sizeof(struct name));
468         if (!val) {
469                 error("cannot sanitize name, not enough memory");
470                 free(name);
471                 return NULL;
472         }
473
474         memset(val, 0, sizeof(*val));
475
476         val->val = name;
477         val->len = name_len;
478         val->sub = malloc(name_len);
479         if (!val->sub) {
480                 error("cannot sanitize name, not enough memory");
481                 free(val);
482                 free(name);
483                 return NULL;
484         }
485
486         found = find_collision_reverse_crc32c(val, name_len);
487
488         if (!found) {
489                 warning(
490 "cannot find a hash collision for '%.*s', generating garbage, it won't match indexes",
491                         val->len, val->val);
492                 for (i = 0; i < name_len; i++) {
493                         char c = rand_range(94) + 33;
494
495                         if (c == '/')
496                                 c++;
497                         val->sub[i] = c;
498                 }
499         }
500
501         tree_insert(name_tree, &val->n, name_cmp);
502         return val->sub;
503 }
504
505 static void sanitize_dir_item(enum sanitize_mode sanitize,
506                 struct rb_root *name_tree, struct extent_buffer *eb, int slot)
507 {
508         struct btrfs_dir_item *dir_item;
509         char *buf;
510         char *garbage;
511         unsigned long name_ptr;
512         u32 total_len;
513         u32 cur = 0;
514         u32 this_len;
515         u32 name_len;
516         int free_garbage = (sanitize == SANITIZE_NAMES);
517
518         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
519         total_len = btrfs_item_size_nr(eb, slot);
520         while (cur < total_len) {
521                 this_len = sizeof(*dir_item) +
522                         btrfs_dir_name_len(eb, dir_item) +
523                         btrfs_dir_data_len(eb, dir_item);
524                 name_ptr = (unsigned long)(dir_item + 1);
525                 name_len = btrfs_dir_name_len(eb, dir_item);
526
527                 if (sanitize == SANITIZE_COLLISIONS) {
528                         buf = malloc(name_len);
529                         if (!buf) {
530                                 error("cannot sanitize name, not enough memory");
531                                 return;
532                         }
533                         read_extent_buffer(eb, buf, name_ptr, name_len);
534                         garbage = find_collision(name_tree, buf, name_len);
535                 } else {
536                         garbage = generate_garbage(name_len);
537                 }
538                 if (!garbage) {
539                         error("cannot sanitize name, not enough memory");
540                         return;
541                 }
542                 write_extent_buffer(eb, garbage, name_ptr, name_len);
543                 cur += this_len;
544                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
545                                                      this_len);
546                 if (free_garbage)
547                         free(garbage);
548         }
549 }
550
551 static void sanitize_inode_ref(enum sanitize_mode sanitize,
552                 struct rb_root *name_tree, struct extent_buffer *eb, int slot,
553                 int ext)
554 {
555         struct btrfs_inode_extref *extref;
556         struct btrfs_inode_ref *ref;
557         char *garbage, *buf;
558         unsigned long ptr;
559         unsigned long name_ptr;
560         u32 item_size;
561         u32 cur_offset = 0;
562         int len;
563         int free_garbage = (sanitize == SANITIZE_NAMES);
564
565         item_size = btrfs_item_size_nr(eb, slot);
566         ptr = btrfs_item_ptr_offset(eb, slot);
567         while (cur_offset < item_size) {
568                 if (ext) {
569                         extref = (struct btrfs_inode_extref *)(ptr +
570                                                                cur_offset);
571                         name_ptr = (unsigned long)(&extref->name);
572                         len = btrfs_inode_extref_name_len(eb, extref);
573                         cur_offset += sizeof(*extref);
574                 } else {
575                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
576                         len = btrfs_inode_ref_name_len(eb, ref);
577                         name_ptr = (unsigned long)(ref + 1);
578                         cur_offset += sizeof(*ref);
579                 }
580                 cur_offset += len;
581
582                 if (sanitize == SANITIZE_COLLISIONS) {
583                         buf = malloc(len);
584                         if (!buf) {
585                                 error("cannot sanitize name, not enough memory");
586                                 return;
587                         }
588                         read_extent_buffer(eb, buf, name_ptr, len);
589                         garbage = find_collision(name_tree, buf, len);
590                 } else {
591                         garbage = generate_garbage(len);
592                 }
593
594                 if (!garbage) {
595                         error("cannot sanitize name, not enough memory");
596                         return;
597                 }
598                 write_extent_buffer(eb, garbage, name_ptr, len);
599                 if (free_garbage)
600                         free(garbage);
601         }
602 }
603
604 static void sanitize_xattr(struct extent_buffer *eb, int slot)
605 {
606         struct btrfs_dir_item *dir_item;
607         unsigned long data_ptr;
608         u32 data_len;
609
610         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
611         data_len = btrfs_dir_data_len(eb, dir_item);
612
613         data_ptr = (unsigned long)((char *)(dir_item + 1) +
614                                    btrfs_dir_name_len(eb, dir_item));
615         memset_extent_buffer(eb, 0, data_ptr, data_len);
616 }
617
618 static void sanitize_name(enum sanitize_mode sanitize, struct rb_root *name_tree,
619                 u8 *dst, struct extent_buffer *src, struct btrfs_key *key,
620                 int slot)
621 {
622         struct extent_buffer *eb;
623
624         eb = alloc_dummy_eb(src->start, src->len);
625         if (!eb) {
626                 error("cannot sanitize name, not enough memory");
627                 return;
628         }
629
630         memcpy(eb->data, src->data, src->len);
631
632         switch (key->type) {
633         case BTRFS_DIR_ITEM_KEY:
634         case BTRFS_DIR_INDEX_KEY:
635                 sanitize_dir_item(sanitize, name_tree, eb, slot);
636                 break;
637         case BTRFS_INODE_REF_KEY:
638                 sanitize_inode_ref(sanitize, name_tree, eb, slot, 0);
639                 break;
640         case BTRFS_INODE_EXTREF_KEY:
641                 sanitize_inode_ref(sanitize, name_tree, eb, slot, 1);
642                 break;
643         case BTRFS_XATTR_ITEM_KEY:
644                 sanitize_xattr(eb, slot);
645                 break;
646         default:
647                 break;
648         }
649
650         memcpy(dst, eb->data, eb->len);
651         free(eb);
652 }
653
654 /*
655  * zero inline extents and csum items
656  */
657 static void zero_items(struct metadump_struct *md, u8 *dst,
658                        struct extent_buffer *src)
659 {
660         struct btrfs_file_extent_item *fi;
661         struct btrfs_item *item;
662         struct btrfs_key key;
663         u32 nritems = btrfs_header_nritems(src);
664         size_t size;
665         unsigned long ptr;
666         int i, extent_type;
667
668         for (i = 0; i < nritems; i++) {
669                 item = btrfs_item_nr(i);
670                 btrfs_item_key_to_cpu(src, &key, i);
671                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
672                         size = btrfs_item_size_nr(src, i);
673                         memset(dst + btrfs_leaf_data(src) +
674                                btrfs_item_offset_nr(src, i), 0, size);
675                         continue;
676                 }
677
678                 if (md->sanitize_names && has_name(&key)) {
679                         sanitize_name(md->sanitize_names, &md->name_tree, dst,
680                                         src, &key, i);
681                         continue;
682                 }
683
684                 if (key.type != BTRFS_EXTENT_DATA_KEY)
685                         continue;
686
687                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
688                 extent_type = btrfs_file_extent_type(src, fi);
689                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
690                         continue;
691
692                 ptr = btrfs_file_extent_inline_start(fi);
693                 size = btrfs_file_extent_inline_item_len(src, item);
694                 memset(dst + ptr, 0, size);
695         }
696 }
697
698 /*
699  * copy buffer and zero useless data in the buffer
700  */
701 static void copy_buffer(struct metadump_struct *md, u8 *dst,
702                         struct extent_buffer *src)
703 {
704         int level;
705         size_t size;
706         u32 nritems;
707
708         memcpy(dst, src->data, src->len);
709         if (src->start == BTRFS_SUPER_INFO_OFFSET)
710                 return;
711
712         level = btrfs_header_level(src);
713         nritems = btrfs_header_nritems(src);
714
715         if (nritems == 0) {
716                 size = sizeof(struct btrfs_header);
717                 memset(dst + size, 0, src->len - size);
718         } else if (level == 0) {
719                 size = btrfs_leaf_data(src) +
720                         btrfs_item_offset_nr(src, nritems - 1) -
721                         btrfs_item_nr_offset(nritems);
722                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
723                 zero_items(md, dst, src);
724         } else {
725                 size = offsetof(struct btrfs_node, ptrs) +
726                         sizeof(struct btrfs_key_ptr) * nritems;
727                 memset(dst + size, 0, src->len - size);
728         }
729         csum_block(dst, src->len);
730 }
731
732 static void *dump_worker(void *data)
733 {
734         struct metadump_struct *md = (struct metadump_struct *)data;
735         struct async_work *async;
736         int ret;
737
738         while (1) {
739                 pthread_mutex_lock(&md->mutex);
740                 while (list_empty(&md->list)) {
741                         if (md->done) {
742                                 pthread_mutex_unlock(&md->mutex);
743                                 goto out;
744                         }
745                         pthread_cond_wait(&md->cond, &md->mutex);
746                 }
747                 async = list_entry(md->list.next, struct async_work, list);
748                 list_del_init(&async->list);
749                 pthread_mutex_unlock(&md->mutex);
750
751                 if (md->compress_level > 0) {
752                         u8 *orig = async->buffer;
753
754                         async->bufsize = compressBound(async->size);
755                         async->buffer = malloc(async->bufsize);
756                         if (!async->buffer) {
757                                 error("not enough memory for async buffer");
758                                 pthread_mutex_lock(&md->mutex);
759                                 if (!md->error)
760                                         md->error = -ENOMEM;
761                                 pthread_mutex_unlock(&md->mutex);
762                                 pthread_exit(NULL);
763                         }
764
765                         ret = compress2(async->buffer,
766                                          (unsigned long *)&async->bufsize,
767                                          orig, async->size, md->compress_level);
768
769                         if (ret != Z_OK)
770                                 async->error = 1;
771
772                         free(orig);
773                 }
774
775                 pthread_mutex_lock(&md->mutex);
776                 md->num_ready++;
777                 pthread_mutex_unlock(&md->mutex);
778         }
779 out:
780         pthread_exit(NULL);
781 }
782
783 static void meta_cluster_init(struct metadump_struct *md, u64 start)
784 {
785         struct meta_cluster_header *header;
786
787         md->num_items = 0;
788         md->num_ready = 0;
789         header = &md->cluster.header;
790         header->magic = cpu_to_le64(HEADER_MAGIC);
791         header->bytenr = cpu_to_le64(start);
792         header->nritems = cpu_to_le32(0);
793         header->compress = md->compress_level > 0 ?
794                            COMPRESS_ZLIB : COMPRESS_NONE;
795 }
796
797 static void metadump_destroy(struct metadump_struct *md, int num_threads)
798 {
799         int i;
800         struct rb_node *n;
801
802         pthread_mutex_lock(&md->mutex);
803         md->done = 1;
804         pthread_cond_broadcast(&md->cond);
805         pthread_mutex_unlock(&md->mutex);
806
807         for (i = 0; i < num_threads; i++)
808                 pthread_join(md->threads[i], NULL);
809
810         pthread_cond_destroy(&md->cond);
811         pthread_mutex_destroy(&md->mutex);
812
813         while ((n = rb_first(&md->name_tree))) {
814                 struct name *name;
815
816                 name = rb_entry(n, struct name, n);
817                 rb_erase(n, &md->name_tree);
818                 free(name->val);
819                 free(name->sub);
820                 free(name);
821         }
822 }
823
824 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
825                          FILE *out, int num_threads, int compress_level,
826                          enum sanitize_mode sanitize_names)
827 {
828         int i, ret = 0;
829
830         memset(md, 0, sizeof(*md));
831         INIT_LIST_HEAD(&md->list);
832         INIT_LIST_HEAD(&md->ordered);
833         md->root = root;
834         md->out = out;
835         md->pending_start = (u64)-1;
836         md->compress_level = compress_level;
837         md->sanitize_names = sanitize_names;
838         if (sanitize_names == SANITIZE_COLLISIONS)
839                 crc32c_optimization_init();
840
841         md->name_tree.rb_node = NULL;
842         md->num_threads = num_threads;
843         pthread_cond_init(&md->cond, NULL);
844         pthread_mutex_init(&md->mutex, NULL);
845         meta_cluster_init(md, 0);
846
847         if (!num_threads)
848                 return 0;
849
850         for (i = 0; i < num_threads; i++) {
851                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
852                 if (ret)
853                         break;
854         }
855
856         if (ret)
857                 metadump_destroy(md, i + 1);
858
859         return ret;
860 }
861
862 static int write_zero(FILE *out, size_t size)
863 {
864         static char zero[BLOCK_SIZE];
865         return fwrite(zero, size, 1, out);
866 }
867
868 static int write_buffers(struct metadump_struct *md, u64 *next)
869 {
870         struct meta_cluster_header *header = &md->cluster.header;
871         struct meta_cluster_item *item;
872         struct async_work *async;
873         u64 bytenr = 0;
874         u32 nritems = 0;
875         int ret;
876         int err = 0;
877
878         if (list_empty(&md->ordered))
879                 goto out;
880
881         /* wait until all buffers are compressed */
882         while (!err && md->num_items > md->num_ready) {
883                 struct timespec ts = {
884                         .tv_sec = 0,
885                         .tv_nsec = 10000000,
886                 };
887                 pthread_mutex_unlock(&md->mutex);
888                 nanosleep(&ts, NULL);
889                 pthread_mutex_lock(&md->mutex);
890                 err = md->error;
891         }
892
893         if (err) {
894                 error("one of the threads failed: %s", strerror(-err));
895                 goto out;
896         }
897
898         /* setup and write index block */
899         list_for_each_entry(async, &md->ordered, ordered) {
900                 item = &md->cluster.items[nritems];
901                 item->bytenr = cpu_to_le64(async->start);
902                 item->size = cpu_to_le32(async->bufsize);
903                 nritems++;
904         }
905         header->nritems = cpu_to_le32(nritems);
906
907         ret = fwrite(&md->cluster, BLOCK_SIZE, 1, md->out);
908         if (ret != 1) {
909                 error("unable to write out cluster: %s", strerror(errno));
910                 return -errno;
911         }
912
913         /* write buffers */
914         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
915         while (!list_empty(&md->ordered)) {
916                 async = list_entry(md->ordered.next, struct async_work,
917                                    ordered);
918                 list_del_init(&async->ordered);
919
920                 bytenr += async->bufsize;
921                 if (!err)
922                         ret = fwrite(async->buffer, async->bufsize, 1,
923                                      md->out);
924                 if (ret != 1) {
925                         error("unable to write out cluster: %s",
926                                 strerror(errno));
927                         err = -errno;
928                         ret = 0;
929                 }
930
931                 free(async->buffer);
932                 free(async);
933         }
934
935         /* zero unused space in the last block */
936         if (!err && bytenr & BLOCK_MASK) {
937                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
938
939                 bytenr += size;
940                 ret = write_zero(md->out, size);
941                 if (ret != 1) {
942                         error("unable to zero out buffer: %s",
943                                 strerror(errno));
944                         err = -errno;
945                 }
946         }
947 out:
948         *next = bytenr;
949         return err;
950 }
951
952 static int read_data_extent(struct metadump_struct *md,
953                             struct async_work *async)
954 {
955         struct btrfs_root *root = md->root;
956         struct btrfs_fs_info *fs_info = root->fs_info;
957         u64 bytes_left = async->size;
958         u64 logical = async->start;
959         u64 offset = 0;
960         u64 read_len;
961         int num_copies;
962         int cur_mirror;
963         int ret;
964
965         num_copies = btrfs_num_copies(root->fs_info, logical, bytes_left);
966
967         /* Try our best to read data, just like read_tree_block() */
968         for (cur_mirror = 0; cur_mirror < num_copies; cur_mirror++) {
969                 while (bytes_left) {
970                         read_len = bytes_left;
971                         ret = read_extent_data(fs_info,
972                                         (char *)(async->buffer + offset),
973                                         logical, &read_len, cur_mirror);
974                         if (ret < 0)
975                                 break;
976                         offset += read_len;
977                         logical += read_len;
978                         bytes_left -= read_len;
979                 }
980         }
981         if (bytes_left)
982                 return -EIO;
983         return 0;
984 }
985
986 static int get_dev_fd(struct btrfs_root *root)
987 {
988         struct btrfs_device *dev;
989
990         dev = list_first_entry(&root->fs_info->fs_devices->devices,
991                                struct btrfs_device, dev_list);
992         return dev->fd;
993 }
994
995 static int flush_pending(struct metadump_struct *md, int done)
996 {
997         struct async_work *async = NULL;
998         struct extent_buffer *eb;
999         u64 start = 0;
1000         u64 size;
1001         size_t offset;
1002         int ret = 0;
1003
1004         if (md->pending_size) {
1005                 async = calloc(1, sizeof(*async));
1006                 if (!async)
1007                         return -ENOMEM;
1008
1009                 async->start = md->pending_start;
1010                 async->size = md->pending_size;
1011                 async->bufsize = async->size;
1012                 async->buffer = malloc(async->bufsize);
1013                 if (!async->buffer) {
1014                         free(async);
1015                         return -ENOMEM;
1016                 }
1017                 offset = 0;
1018                 start = async->start;
1019                 size = async->size;
1020
1021                 if (md->data) {
1022                         ret = read_data_extent(md, async);
1023                         if (ret) {
1024                                 free(async->buffer);
1025                                 free(async);
1026                                 return ret;
1027                         }
1028                 }
1029
1030                 /*
1031                  * Balance can make the mapping not cover the super block, so
1032                  * just copy directly from one of the devices.
1033                  */
1034                 if (start == BTRFS_SUPER_INFO_OFFSET) {
1035                         int fd = get_dev_fd(md->root);
1036
1037                         ret = pread64(fd, async->buffer, size, start);
1038                         if (ret < size) {
1039                                 free(async->buffer);
1040                                 free(async);
1041                                 error("unable to read superblock at %llu: %s",
1042                                                 (unsigned long long)start,
1043                                                 strerror(errno));
1044                                 return -errno;
1045                         }
1046                         size = 0;
1047                         ret = 0;
1048                 }
1049
1050                 while (!md->data && size > 0) {
1051                         u64 this_read = min((u64)md->root->fs_info->nodesize,
1052                                         size);
1053
1054                         eb = read_tree_block(md->root->fs_info, start, 0);
1055                         if (!extent_buffer_uptodate(eb)) {
1056                                 free(async->buffer);
1057                                 free(async);
1058                                 error("unable to read metadata block %llu",
1059                                         (unsigned long long)start);
1060                                 return -EIO;
1061                         }
1062                         copy_buffer(md, async->buffer + offset, eb);
1063                         free_extent_buffer(eb);
1064                         start += this_read;
1065                         offset += this_read;
1066                         size -= this_read;
1067                 }
1068
1069                 md->pending_start = (u64)-1;
1070                 md->pending_size = 0;
1071         } else if (!done) {
1072                 return 0;
1073         }
1074
1075         pthread_mutex_lock(&md->mutex);
1076         if (async) {
1077                 list_add_tail(&async->ordered, &md->ordered);
1078                 md->num_items++;
1079                 if (md->compress_level > 0) {
1080                         list_add_tail(&async->list, &md->list);
1081                         pthread_cond_signal(&md->cond);
1082                 } else {
1083                         md->num_ready++;
1084                 }
1085         }
1086         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
1087                 ret = write_buffers(md, &start);
1088                 if (ret)
1089                         error("unable to write buffers: %s", strerror(-ret));
1090                 else
1091                         meta_cluster_init(md, start);
1092         }
1093         pthread_mutex_unlock(&md->mutex);
1094         return ret;
1095 }
1096
1097 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
1098                       int data)
1099 {
1100         int ret;
1101         if (md->data != data ||
1102             md->pending_size + size > MAX_PENDING_SIZE ||
1103             md->pending_start + md->pending_size != start) {
1104                 ret = flush_pending(md, 0);
1105                 if (ret)
1106                         return ret;
1107                 md->pending_start = start;
1108         }
1109         readahead_tree_block(md->root->fs_info, start, 0);
1110         md->pending_size += size;
1111         md->data = data;
1112         return 0;
1113 }
1114
1115 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1116 static int is_tree_block(struct btrfs_root *extent_root,
1117                          struct btrfs_path *path, u64 bytenr)
1118 {
1119         struct extent_buffer *leaf;
1120         struct btrfs_key key;
1121         u64 ref_objectid;
1122         int ret;
1123
1124         leaf = path->nodes[0];
1125         while (1) {
1126                 struct btrfs_extent_ref_v0 *ref_item;
1127                 path->slots[0]++;
1128                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1129                         ret = btrfs_next_leaf(extent_root, path);
1130                         if (ret < 0)
1131                                 return ret;
1132                         if (ret > 0)
1133                                 break;
1134                         leaf = path->nodes[0];
1135                 }
1136                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1137                 if (key.objectid != bytenr)
1138                         break;
1139                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
1140                         continue;
1141                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1142                                           struct btrfs_extent_ref_v0);
1143                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
1144                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
1145                         return 1;
1146                 break;
1147         }
1148         return 0;
1149 }
1150 #endif
1151
1152 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
1153                             struct metadump_struct *metadump, int root_tree)
1154 {
1155         struct extent_buffer *tmp;
1156         struct btrfs_root_item *ri;
1157         struct btrfs_key key;
1158         struct btrfs_fs_info *fs_info = root->fs_info;
1159         u64 bytenr;
1160         int level;
1161         int nritems = 0;
1162         int i = 0;
1163         int ret;
1164
1165         ret = add_extent(btrfs_header_bytenr(eb), fs_info->nodesize,
1166                          metadump, 0);
1167         if (ret) {
1168                 error("unable to add metadata block %llu: %d",
1169                                 btrfs_header_bytenr(eb), ret);
1170                 return ret;
1171         }
1172
1173         if (btrfs_header_level(eb) == 0 && !root_tree)
1174                 return 0;
1175
1176         level = btrfs_header_level(eb);
1177         nritems = btrfs_header_nritems(eb);
1178         for (i = 0; i < nritems; i++) {
1179                 if (level == 0) {
1180                         btrfs_item_key_to_cpu(eb, &key, i);
1181                         if (key.type != BTRFS_ROOT_ITEM_KEY)
1182                                 continue;
1183                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
1184                         bytenr = btrfs_disk_root_bytenr(eb, ri);
1185                         tmp = read_tree_block(fs_info, bytenr, 0);
1186                         if (!extent_buffer_uptodate(tmp)) {
1187                                 error("unable to read log root block");
1188                                 return -EIO;
1189                         }
1190                         ret = copy_tree_blocks(root, tmp, metadump, 0);
1191                         free_extent_buffer(tmp);
1192                         if (ret)
1193                                 return ret;
1194                 } else {
1195                         bytenr = btrfs_node_blockptr(eb, i);
1196                         tmp = read_tree_block(fs_info, bytenr, 0);
1197                         if (!extent_buffer_uptodate(tmp)) {
1198                                 error("unable to read log root block");
1199                                 return -EIO;
1200                         }
1201                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
1202                         free_extent_buffer(tmp);
1203                         if (ret)
1204                                 return ret;
1205                 }
1206         }
1207
1208         return 0;
1209 }
1210
1211 static int copy_log_trees(struct btrfs_root *root,
1212                           struct metadump_struct *metadump)
1213 {
1214         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1215
1216         if (blocknr == 0)
1217                 return 0;
1218
1219         if (!root->fs_info->log_root_tree ||
1220             !root->fs_info->log_root_tree->node) {
1221                 error("unable to copy tree log, it has not been setup");
1222                 return -EIO;
1223         }
1224
1225         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1226                                 metadump, 1);
1227 }
1228
1229 static int copy_space_cache(struct btrfs_root *root,
1230                             struct metadump_struct *metadump,
1231                             struct btrfs_path *path)
1232 {
1233         struct extent_buffer *leaf;
1234         struct btrfs_file_extent_item *fi;
1235         struct btrfs_key key;
1236         u64 bytenr, num_bytes;
1237         int ret;
1238
1239         root = root->fs_info->tree_root;
1240
1241         key.objectid = 0;
1242         key.type = BTRFS_EXTENT_DATA_KEY;
1243         key.offset = 0;
1244
1245         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1246         if (ret < 0) {
1247                 error("free space inode not found: %d", ret);
1248                 return ret;
1249         }
1250
1251         leaf = path->nodes[0];
1252
1253         while (1) {
1254                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1255                         ret = btrfs_next_leaf(root, path);
1256                         if (ret < 0) {
1257                                 error("cannot go to next leaf %d", ret);
1258                                 return ret;
1259                         }
1260                         if (ret > 0)
1261                                 break;
1262                         leaf = path->nodes[0];
1263                 }
1264
1265                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1266                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1267                         path->slots[0]++;
1268                         continue;
1269                 }
1270
1271                 fi = btrfs_item_ptr(leaf, path->slots[0],
1272                                     struct btrfs_file_extent_item);
1273                 if (btrfs_file_extent_type(leaf, fi) !=
1274                     BTRFS_FILE_EXTENT_REG) {
1275                         path->slots[0]++;
1276                         continue;
1277                 }
1278
1279                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1280                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1281                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1282                 if (ret) {
1283                         error("unable to add space cache blocks %d", ret);
1284                         btrfs_release_path(path);
1285                         return ret;
1286                 }
1287                 path->slots[0]++;
1288         }
1289
1290         return 0;
1291 }
1292
1293 static int copy_from_extent_tree(struct metadump_struct *metadump,
1294                                  struct btrfs_path *path)
1295 {
1296         struct btrfs_root *extent_root;
1297         struct extent_buffer *leaf;
1298         struct btrfs_extent_item *ei;
1299         struct btrfs_key key;
1300         u64 bytenr;
1301         u64 num_bytes;
1302         int ret;
1303
1304         extent_root = metadump->root->fs_info->extent_root;
1305         bytenr = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1306         key.objectid = bytenr;
1307         key.type = BTRFS_EXTENT_ITEM_KEY;
1308         key.offset = 0;
1309
1310         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1311         if (ret < 0) {
1312                 error("extent root not found: %d", ret);
1313                 return ret;
1314         }
1315         ret = 0;
1316
1317         leaf = path->nodes[0];
1318
1319         while (1) {
1320                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1321                         ret = btrfs_next_leaf(extent_root, path);
1322                         if (ret < 0) {
1323                                 error("cannot go to next leaf %d", ret);
1324                                 break;
1325                         }
1326                         if (ret > 0) {
1327                                 ret = 0;
1328                                 break;
1329                         }
1330                         leaf = path->nodes[0];
1331                 }
1332
1333                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1334                 if (key.objectid < bytenr ||
1335                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1336                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1337                         path->slots[0]++;
1338                         continue;
1339                 }
1340
1341                 bytenr = key.objectid;
1342                 if (key.type == BTRFS_METADATA_ITEM_KEY) {
1343                         num_bytes = extent_root->fs_info->nodesize;
1344                 } else {
1345                         num_bytes = key.offset;
1346                 }
1347
1348                 if (num_bytes == 0) {
1349                         error("extent length 0 at bytenr %llu key type %d",
1350                                         (unsigned long long)bytenr, key.type);
1351                         ret = -EIO;
1352                         break;
1353                 }
1354
1355                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1356                         ei = btrfs_item_ptr(leaf, path->slots[0],
1357                                             struct btrfs_extent_item);
1358                         if (btrfs_extent_flags(leaf, ei) &
1359                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1360                                 ret = add_extent(bytenr, num_bytes, metadump,
1361                                                  0);
1362                                 if (ret) {
1363                                         error("unable to add block %llu: %d",
1364                                                 (unsigned long long)bytenr, ret);
1365                                         break;
1366                                 }
1367                         }
1368                 } else {
1369 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1370                         ret = is_tree_block(extent_root, path, bytenr);
1371                         if (ret < 0) {
1372                                 error("failed to check tree block %llu: %d",
1373                                         (unsigned long long)bytenr, ret);
1374                                 break;
1375                         }
1376
1377                         if (ret) {
1378                                 ret = add_extent(bytenr, num_bytes, metadump,
1379                                                  0);
1380                                 if (ret) {
1381                                         error("unable to add block %llu: %d",
1382                                                 (unsigned long long)bytenr, ret);
1383                                         break;
1384                                 }
1385                         }
1386                         ret = 0;
1387 #else
1388                         error(
1389         "either extent tree is corrupted or you haven't built with V0 support");
1390                         ret = -EIO;
1391                         break;
1392 #endif
1393                 }
1394                 bytenr += num_bytes;
1395         }
1396
1397         btrfs_release_path(path);
1398
1399         return ret;
1400 }
1401
1402 static int create_metadump(const char *input, FILE *out, int num_threads,
1403                            int compress_level, enum sanitize_mode sanitize,
1404                            int walk_trees)
1405 {
1406         struct btrfs_root *root;
1407         struct btrfs_path path;
1408         struct metadump_struct metadump;
1409         int ret;
1410         int err = 0;
1411
1412         root = open_ctree(input, 0, 0);
1413         if (!root) {
1414                 error("open ctree failed");
1415                 return -EIO;
1416         }
1417
1418         ret = metadump_init(&metadump, root, out, num_threads,
1419                             compress_level, sanitize);
1420         if (ret) {
1421                 error("failed to initialize metadump: %d", ret);
1422                 close_ctree(root);
1423                 return ret;
1424         }
1425
1426         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE,
1427                         &metadump, 0);
1428         if (ret) {
1429                 error("unable to add metadata: %d", ret);
1430                 err = ret;
1431                 goto out;
1432         }
1433
1434         btrfs_init_path(&path);
1435
1436         if (walk_trees) {
1437                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1438                                        &metadump, 1);
1439                 if (ret) {
1440                         err = ret;
1441                         goto out;
1442                 }
1443
1444                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1445                                        &metadump, 1);
1446                 if (ret) {
1447                         err = ret;
1448                         goto out;
1449                 }
1450         } else {
1451                 ret = copy_from_extent_tree(&metadump, &path);
1452                 if (ret) {
1453                         err = ret;
1454                         goto out;
1455                 }
1456         }
1457
1458         ret = copy_log_trees(root, &metadump);
1459         if (ret) {
1460                 err = ret;
1461                 goto out;
1462         }
1463
1464         ret = copy_space_cache(root, &metadump, &path);
1465 out:
1466         ret = flush_pending(&metadump, 1);
1467         if (ret) {
1468                 if (!err)
1469                         err = ret;
1470                 error("failed to flush pending data: %d", ret);
1471         }
1472
1473         metadump_destroy(&metadump, num_threads);
1474
1475         btrfs_release_path(&path);
1476         ret = close_ctree(root);
1477         return err ? err : ret;
1478 }
1479
1480 static void update_super_old(u8 *buffer)
1481 {
1482         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1483         struct btrfs_chunk *chunk;
1484         struct btrfs_disk_key *key;
1485         u32 sectorsize = btrfs_super_sectorsize(super);
1486         u64 flags = btrfs_super_flags(super);
1487
1488         flags |= BTRFS_SUPER_FLAG_METADUMP;
1489         btrfs_set_super_flags(super, flags);
1490
1491         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1492         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1493                                        sizeof(struct btrfs_disk_key));
1494
1495         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1496         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1497         btrfs_set_disk_key_offset(key, 0);
1498
1499         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1500         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1501         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1502         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1503         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1504         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1505         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1506         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1507         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1508         chunk->stripe.devid = super->dev_item.devid;
1509         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1510         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1511         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1512         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1513 }
1514
1515 static int update_super(struct mdrestore_struct *mdres, u8 *buffer)
1516 {
1517         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1518         struct btrfs_chunk *chunk;
1519         struct btrfs_disk_key *disk_key;
1520         struct btrfs_key key;
1521         u64 flags = btrfs_super_flags(super);
1522         u32 new_array_size = 0;
1523         u32 array_size;
1524         u32 cur = 0;
1525         u8 *ptr, *write_ptr;
1526         int old_num_stripes;
1527
1528         write_ptr = ptr = super->sys_chunk_array;
1529         array_size = btrfs_super_sys_array_size(super);
1530
1531         while (cur < array_size) {
1532                 disk_key = (struct btrfs_disk_key *)ptr;
1533                 btrfs_disk_key_to_cpu(&key, disk_key);
1534
1535                 new_array_size += sizeof(*disk_key);
1536                 memmove(write_ptr, ptr, sizeof(*disk_key));
1537
1538                 write_ptr += sizeof(*disk_key);
1539                 ptr += sizeof(*disk_key);
1540                 cur += sizeof(*disk_key);
1541
1542                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1543                         u64 type, physical, physical_dup, size = 0;
1544
1545                         chunk = (struct btrfs_chunk *)ptr;
1546                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1547                         chunk = (struct btrfs_chunk *)write_ptr;
1548
1549                         memmove(write_ptr, ptr, sizeof(*chunk));
1550                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1551                         type = btrfs_stack_chunk_type(chunk);
1552                         if (type & BTRFS_BLOCK_GROUP_DUP) {
1553                                 new_array_size += sizeof(struct btrfs_stripe);
1554                                 write_ptr += sizeof(struct btrfs_stripe);
1555                         } else {
1556                                 btrfs_set_stack_chunk_num_stripes(chunk, 1);
1557                                 btrfs_set_stack_chunk_type(chunk,
1558                                                 BTRFS_BLOCK_GROUP_SYSTEM);
1559                         }
1560                         chunk->stripe.devid = super->dev_item.devid;
1561                         physical = logical_to_physical(mdres, key.offset,
1562                                                        &size, &physical_dup);
1563                         if (size != (u64)-1)
1564                                 btrfs_set_stack_stripe_offset(&chunk->stripe,
1565                                                               physical);
1566                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1567                                BTRFS_UUID_SIZE);
1568                         new_array_size += sizeof(*chunk);
1569                 } else {
1570                         error("bogus key in the sys array %d", key.type);
1571                         return -EIO;
1572                 }
1573                 write_ptr += sizeof(*chunk);
1574                 ptr += btrfs_chunk_item_size(old_num_stripes);
1575                 cur += btrfs_chunk_item_size(old_num_stripes);
1576         }
1577
1578         if (mdres->clear_space_cache)
1579                 btrfs_set_super_cache_generation(super, 0);
1580
1581         flags |= BTRFS_SUPER_FLAG_METADUMP_V2;
1582         btrfs_set_super_flags(super, flags);
1583         btrfs_set_super_sys_array_size(super, new_array_size);
1584         btrfs_set_super_num_devices(super, 1);
1585         csum_block(buffer, BTRFS_SUPER_INFO_SIZE);
1586
1587         return 0;
1588 }
1589
1590 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1591 {
1592         struct extent_buffer *eb;
1593
1594         eb = calloc(1, sizeof(struct extent_buffer) + size);
1595         if (!eb)
1596                 return NULL;
1597
1598         eb->start = bytenr;
1599         eb->len = size;
1600         return eb;
1601 }
1602
1603 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1604 {
1605         struct btrfs_item *item;
1606         u32 nritems;
1607         u32 old_size;
1608         u32 old_data_start;
1609         u32 size_diff;
1610         u32 data_end;
1611         int i;
1612
1613         old_size = btrfs_item_size_nr(eb, slot);
1614         if (old_size == new_size)
1615                 return;
1616
1617         nritems = btrfs_header_nritems(eb);
1618         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1619
1620         old_data_start = btrfs_item_offset_nr(eb, slot);
1621         size_diff = old_size - new_size;
1622
1623         for (i = slot; i < nritems; i++) {
1624                 u32 ioff;
1625                 item = btrfs_item_nr(i);
1626                 ioff = btrfs_item_offset(eb, item);
1627                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1628         }
1629
1630         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1631                               btrfs_leaf_data(eb) + data_end,
1632                               old_data_start + new_size - data_end);
1633         item = btrfs_item_nr(slot);
1634         btrfs_set_item_size(eb, item, new_size);
1635 }
1636
1637 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1638                                   struct async_work *async, u8 *buffer,
1639                                   size_t size)
1640 {
1641         struct extent_buffer *eb;
1642         size_t size_left = size;
1643         u64 bytenr = async->start;
1644         int i;
1645
1646         if (size_left % mdres->nodesize)
1647                 return 0;
1648
1649         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
1650         if (!eb)
1651                 return -ENOMEM;
1652
1653         while (size_left) {
1654                 eb->start = bytenr;
1655                 memcpy(eb->data, buffer, mdres->nodesize);
1656
1657                 if (btrfs_header_bytenr(eb) != bytenr)
1658                         break;
1659                 if (memcmp(mdres->fsid,
1660                            eb->data + offsetof(struct btrfs_header, fsid),
1661                            BTRFS_FSID_SIZE))
1662                         break;
1663
1664                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1665                         goto next;
1666
1667                 if (btrfs_header_level(eb) != 0)
1668                         goto next;
1669
1670                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1671                         struct btrfs_chunk *chunk;
1672                         struct btrfs_key key;
1673                         u64 type, physical, physical_dup, size = (u64)-1;
1674
1675                         btrfs_item_key_to_cpu(eb, &key, i);
1676                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1677                                 continue;
1678
1679                         size = 0;
1680                         physical = logical_to_physical(mdres, key.offset,
1681                                                        &size, &physical_dup);
1682
1683                         if (!physical_dup)
1684                                 truncate_item(eb, i, sizeof(*chunk));
1685                         chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
1686
1687
1688                         /* Zero out the RAID profile */
1689                         type = btrfs_chunk_type(eb, chunk);
1690                         type &= (BTRFS_BLOCK_GROUP_DATA |
1691                                  BTRFS_BLOCK_GROUP_SYSTEM |
1692                                  BTRFS_BLOCK_GROUP_METADATA |
1693                                  BTRFS_BLOCK_GROUP_DUP);
1694                         btrfs_set_chunk_type(eb, chunk, type);
1695
1696                         if (!physical_dup)
1697                                 btrfs_set_chunk_num_stripes(eb, chunk, 1);
1698                         btrfs_set_chunk_sub_stripes(eb, chunk, 0);
1699                         btrfs_set_stripe_devid_nr(eb, chunk, 0, mdres->devid);
1700                         if (size != (u64)-1)
1701                                 btrfs_set_stripe_offset_nr(eb, chunk, 0,
1702                                                            physical);
1703                         /* update stripe 2 offset */
1704                         if (physical_dup)
1705                                 btrfs_set_stripe_offset_nr(eb, chunk, 1,
1706                                                            physical_dup);
1707
1708                         write_extent_buffer(eb, mdres->uuid,
1709                                         (unsigned long)btrfs_stripe_dev_uuid_nr(
1710                                                 chunk, 0),
1711                                         BTRFS_UUID_SIZE);
1712                 }
1713                 memcpy(buffer, eb->data, eb->len);
1714                 csum_block(buffer, eb->len);
1715 next:
1716                 size_left -= mdres->nodesize;
1717                 buffer += mdres->nodesize;
1718                 bytenr += mdres->nodesize;
1719         }
1720
1721         free(eb);
1722         return 0;
1723 }
1724
1725 static void write_backup_supers(int fd, u8 *buf)
1726 {
1727         struct btrfs_super_block *super = (struct btrfs_super_block *)buf;
1728         struct stat st;
1729         u64 size;
1730         u64 bytenr;
1731         int i;
1732         int ret;
1733
1734         if (fstat(fd, &st)) {
1735                 error(
1736         "cannot stat restore point, won't be able to write backup supers: %s",
1737                         strerror(errno));
1738                 return;
1739         }
1740
1741         size = btrfs_device_size(fd, &st);
1742
1743         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1744                 bytenr = btrfs_sb_offset(i);
1745                 if (bytenr + BTRFS_SUPER_INFO_SIZE > size)
1746                         break;
1747                 btrfs_set_super_bytenr(super, bytenr);
1748                 csum_block(buf, BTRFS_SUPER_INFO_SIZE);
1749                 ret = pwrite64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr);
1750                 if (ret < BTRFS_SUPER_INFO_SIZE) {
1751                         if (ret < 0)
1752                                 error(
1753                                 "problem writing out backup super block %d: %s",
1754                                                 i, strerror(errno));
1755                         else
1756                                 error("short write writing out backup super block");
1757                         break;
1758                 }
1759         }
1760 }
1761
1762 static void *restore_worker(void *data)
1763 {
1764         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1765         struct async_work *async;
1766         size_t size;
1767         u8 *buffer;
1768         u8 *outbuf;
1769         int outfd;
1770         int ret;
1771         int compress_size = MAX_PENDING_SIZE * 4;
1772
1773         outfd = fileno(mdres->out);
1774         buffer = malloc(compress_size);
1775         if (!buffer) {
1776                 error("not enough memory for restore worker buffer");
1777                 pthread_mutex_lock(&mdres->mutex);
1778                 if (!mdres->error)
1779                         mdres->error = -ENOMEM;
1780                 pthread_mutex_unlock(&mdres->mutex);
1781                 pthread_exit(NULL);
1782         }
1783
1784         while (1) {
1785                 u64 bytenr, physical_dup;
1786                 off_t offset = 0;
1787                 int err = 0;
1788
1789                 pthread_mutex_lock(&mdres->mutex);
1790                 while (!mdres->nodesize || list_empty(&mdres->list)) {
1791                         if (mdres->done) {
1792                                 pthread_mutex_unlock(&mdres->mutex);
1793                                 goto out;
1794                         }
1795                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1796                 }
1797                 async = list_entry(mdres->list.next, struct async_work, list);
1798                 list_del_init(&async->list);
1799
1800                 if (mdres->compress_method == COMPRESS_ZLIB) {
1801                         size = compress_size; 
1802                         pthread_mutex_unlock(&mdres->mutex);
1803                         ret = uncompress(buffer, (unsigned long *)&size,
1804                                          async->buffer, async->bufsize);
1805                         pthread_mutex_lock(&mdres->mutex);
1806                         if (ret != Z_OK) {
1807                                 error("decompression failed with %d", ret);
1808                                 err = -EIO;
1809                         }
1810                         outbuf = buffer;
1811                 } else {
1812                         outbuf = async->buffer;
1813                         size = async->bufsize;
1814                 }
1815
1816                 if (!mdres->multi_devices) {
1817                         if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1818                                 if (mdres->old_restore) {
1819                                         update_super_old(outbuf);
1820                                 } else {
1821                                         ret = update_super(mdres, outbuf);
1822                                         if (ret)
1823                                                 err = ret;
1824                                 }
1825                         } else if (!mdres->old_restore) {
1826                                 ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1827                                 if (ret)
1828                                         err = ret;
1829                         }
1830                 }
1831
1832                 if (!mdres->fixup_offset) {
1833                         while (size) {
1834                                 u64 chunk_size = size;
1835                                 physical_dup = 0;
1836                                 if (!mdres->multi_devices && !mdres->old_restore)
1837                                         bytenr = logical_to_physical(mdres,
1838                                                      async->start + offset,
1839                                                      &chunk_size,
1840                                                      &physical_dup);
1841                                 else
1842                                         bytenr = async->start + offset;
1843
1844                                 ret = pwrite64(outfd, outbuf+offset, chunk_size,
1845                                                bytenr);
1846                                 if (ret != chunk_size)
1847                                         goto error;
1848
1849                                 if (physical_dup)
1850                                         ret = pwrite64(outfd, outbuf+offset,
1851                                                        chunk_size,
1852                                                        physical_dup);
1853                                 if (ret != chunk_size)
1854                                         goto error;
1855
1856                                 size -= chunk_size;
1857                                 offset += chunk_size;
1858                                 continue;
1859
1860 error:
1861                                 if (ret < 0) {
1862                                         error("unable to write to device: %s",
1863                                                         strerror(errno));
1864                                         err = errno;
1865                                 } else {
1866                                         error("short write");
1867                                         err = -EIO;
1868                                 }
1869                         }
1870                 } else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
1871                         ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
1872                         if (ret) {
1873                                 error("failed to write data");
1874                                 exit(1);
1875                         }
1876                 }
1877
1878
1879                 /* backup super blocks are already there at fixup_offset stage */
1880                 if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
1881                         write_backup_supers(outfd, outbuf);
1882
1883                 if (err && !mdres->error)
1884                         mdres->error = err;
1885                 mdres->num_items--;
1886                 pthread_mutex_unlock(&mdres->mutex);
1887
1888                 free(async->buffer);
1889                 free(async);
1890         }
1891 out:
1892         free(buffer);
1893         pthread_exit(NULL);
1894 }
1895
1896 static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads)
1897 {
1898         struct rb_node *n;
1899         int i;
1900
1901         while ((n = rb_first(&mdres->chunk_tree))) {
1902                 struct fs_chunk *entry;
1903
1904                 entry = rb_entry(n, struct fs_chunk, l);
1905                 rb_erase(n, &mdres->chunk_tree);
1906                 rb_erase(&entry->p, &mdres->physical_tree);
1907                 free(entry);
1908         }
1909         pthread_mutex_lock(&mdres->mutex);
1910         mdres->done = 1;
1911         pthread_cond_broadcast(&mdres->cond);
1912         pthread_mutex_unlock(&mdres->mutex);
1913
1914         for (i = 0; i < num_threads; i++)
1915                 pthread_join(mdres->threads[i], NULL);
1916
1917         pthread_cond_destroy(&mdres->cond);
1918         pthread_mutex_destroy(&mdres->mutex);
1919 }
1920
1921 static int mdrestore_init(struct mdrestore_struct *mdres,
1922                           FILE *in, FILE *out, int old_restore,
1923                           int num_threads, int fixup_offset,
1924                           struct btrfs_fs_info *info, int multi_devices)
1925 {
1926         int i, ret = 0;
1927
1928         memset(mdres, 0, sizeof(*mdres));
1929         pthread_cond_init(&mdres->cond, NULL);
1930         pthread_mutex_init(&mdres->mutex, NULL);
1931         INIT_LIST_HEAD(&mdres->list);
1932         INIT_LIST_HEAD(&mdres->overlapping_chunks);
1933         mdres->in = in;
1934         mdres->out = out;
1935         mdres->old_restore = old_restore;
1936         mdres->chunk_tree.rb_node = NULL;
1937         mdres->fixup_offset = fixup_offset;
1938         mdres->info = info;
1939         mdres->multi_devices = multi_devices;
1940         mdres->clear_space_cache = 0;
1941         mdres->last_physical_offset = 0;
1942         mdres->alloced_chunks = 0;
1943
1944         if (!num_threads)
1945                 return 0;
1946
1947         mdres->num_threads = num_threads;
1948         for (i = 0; i < num_threads; i++) {
1949                 ret = pthread_create(&mdres->threads[i], NULL, restore_worker,
1950                                      mdres);
1951                 if (ret) {
1952                         /* pthread_create returns errno directly */
1953                         ret = -ret;
1954                         break;
1955                 }
1956         }
1957         if (ret)
1958                 mdrestore_destroy(mdres, i + 1);
1959         return ret;
1960 }
1961
1962 static int fill_mdres_info(struct mdrestore_struct *mdres,
1963                            struct async_work *async)
1964 {
1965         struct btrfs_super_block *super;
1966         u8 *buffer = NULL;
1967         u8 *outbuf;
1968         int ret;
1969
1970         /* We've already been initialized */
1971         if (mdres->nodesize)
1972                 return 0;
1973
1974         if (mdres->compress_method == COMPRESS_ZLIB) {
1975                 size_t size = MAX_PENDING_SIZE * 2;
1976
1977                 buffer = malloc(MAX_PENDING_SIZE * 2);
1978                 if (!buffer)
1979                         return -ENOMEM;
1980                 ret = uncompress(buffer, (unsigned long *)&size,
1981                                  async->buffer, async->bufsize);
1982                 if (ret != Z_OK) {
1983                         error("decompression failed with %d", ret);
1984                         free(buffer);
1985                         return -EIO;
1986                 }
1987                 outbuf = buffer;
1988         } else {
1989                 outbuf = async->buffer;
1990         }
1991
1992         super = (struct btrfs_super_block *)outbuf;
1993         mdres->nodesize = btrfs_super_nodesize(super);
1994         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1995         memcpy(mdres->uuid, super->dev_item.uuid,
1996                        BTRFS_UUID_SIZE);
1997         mdres->devid = le64_to_cpu(super->dev_item.devid);
1998         free(buffer);
1999         return 0;
2000 }
2001
2002 static int add_cluster(struct meta_cluster *cluster,
2003                        struct mdrestore_struct *mdres, u64 *next)
2004 {
2005         struct meta_cluster_item *item;
2006         struct meta_cluster_header *header = &cluster->header;
2007         struct async_work *async;
2008         u64 bytenr;
2009         u32 i, nritems;
2010         int ret;
2011
2012         pthread_mutex_lock(&mdres->mutex);
2013         mdres->compress_method = header->compress;
2014         pthread_mutex_unlock(&mdres->mutex);
2015
2016         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
2017         nritems = le32_to_cpu(header->nritems);
2018         for (i = 0; i < nritems; i++) {
2019                 item = &cluster->items[i];
2020                 async = calloc(1, sizeof(*async));
2021                 if (!async) {
2022                         error("not enough memory for async data");
2023                         return -ENOMEM;
2024                 }
2025                 async->start = le64_to_cpu(item->bytenr);
2026                 async->bufsize = le32_to_cpu(item->size);
2027                 async->buffer = malloc(async->bufsize);
2028                 if (!async->buffer) {
2029                         error("not enough memory for async buffer");
2030                         free(async);
2031                         return -ENOMEM;
2032                 }
2033                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
2034                 if (ret != 1) {
2035                         error("unable to read buffer: %s", strerror(errno));
2036                         free(async->buffer);
2037                         free(async);
2038                         return -EIO;
2039                 }
2040                 bytenr += async->bufsize;
2041
2042                 pthread_mutex_lock(&mdres->mutex);
2043                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
2044                         ret = fill_mdres_info(mdres, async);
2045                         if (ret) {
2046                                 error("unable to set up restore state");
2047                                 pthread_mutex_unlock(&mdres->mutex);
2048                                 free(async->buffer);
2049                                 free(async);
2050                                 return ret;
2051                         }
2052                 }
2053                 list_add_tail(&async->list, &mdres->list);
2054                 mdres->num_items++;
2055                 pthread_cond_signal(&mdres->cond);
2056                 pthread_mutex_unlock(&mdres->mutex);
2057         }
2058         if (bytenr & BLOCK_MASK) {
2059                 char buffer[BLOCK_MASK];
2060                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
2061
2062                 bytenr += size;
2063                 ret = fread(buffer, size, 1, mdres->in);
2064                 if (ret != 1) {
2065                         error("failed to read buffer: %s", strerror(errno));
2066                         return -EIO;
2067                 }
2068         }
2069         *next = bytenr;
2070         return 0;
2071 }
2072
2073 static int wait_for_worker(struct mdrestore_struct *mdres)
2074 {
2075         int ret = 0;
2076
2077         pthread_mutex_lock(&mdres->mutex);
2078         ret = mdres->error;
2079         while (!ret && mdres->num_items > 0) {
2080                 struct timespec ts = {
2081                         .tv_sec = 0,
2082                         .tv_nsec = 10000000,
2083                 };
2084                 pthread_mutex_unlock(&mdres->mutex);
2085                 nanosleep(&ts, NULL);
2086                 pthread_mutex_lock(&mdres->mutex);
2087                 ret = mdres->error;
2088         }
2089         pthread_mutex_unlock(&mdres->mutex);
2090         return ret;
2091 }
2092
2093 static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer,
2094                             u64 bytenr, u64 item_bytenr, u32 bufsize,
2095                             u64 cluster_bytenr)
2096 {
2097         struct extent_buffer *eb;
2098         int ret = 0;
2099         int i;
2100
2101         eb = alloc_dummy_eb(bytenr, mdres->nodesize);
2102         if (!eb) {
2103                 ret = -ENOMEM;
2104                 goto out;
2105         }
2106
2107         while (item_bytenr != bytenr) {
2108                 buffer += mdres->nodesize;
2109                 item_bytenr += mdres->nodesize;
2110         }
2111
2112         memcpy(eb->data, buffer, mdres->nodesize);
2113         if (btrfs_header_bytenr(eb) != bytenr) {
2114                 error("eb bytenr does not match found bytenr: %llu != %llu",
2115                                 (unsigned long long)btrfs_header_bytenr(eb),
2116                                 (unsigned long long)bytenr);
2117                 ret = -EIO;
2118                 goto out;
2119         }
2120
2121         if (memcmp(mdres->fsid, eb->data + offsetof(struct btrfs_header, fsid),
2122                    BTRFS_FSID_SIZE)) {
2123                 error("filesystem UUID of eb %llu does not match",
2124                                 (unsigned long long)bytenr);
2125                 ret = -EIO;
2126                 goto out;
2127         }
2128
2129         if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) {
2130                 error("wrong eb %llu owner %llu",
2131                                 (unsigned long long)bytenr,
2132                                 (unsigned long long)btrfs_header_owner(eb));
2133                 ret = -EIO;
2134                 goto out;
2135         }
2136
2137         for (i = 0; i < btrfs_header_nritems(eb); i++) {
2138                 struct btrfs_chunk *chunk;
2139                 struct fs_chunk *fs_chunk;
2140                 struct btrfs_key key;
2141                 u64 type;
2142
2143                 if (btrfs_header_level(eb)) {
2144                         u64 blockptr = btrfs_node_blockptr(eb, i);
2145
2146                         ret = search_for_chunk_blocks(mdres, blockptr,
2147                                                       cluster_bytenr);
2148                         if (ret)
2149                                 break;
2150                         continue;
2151                 }
2152
2153                 /* Yay a leaf!  We loves leafs! */
2154                 btrfs_item_key_to_cpu(eb, &key, i);
2155                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2156                         continue;
2157
2158                 fs_chunk = malloc(sizeof(struct fs_chunk));
2159                 if (!fs_chunk) {
2160                         error("not enough memory to allocate chunk");
2161                         ret = -ENOMEM;
2162                         break;
2163                 }
2164                 memset(fs_chunk, 0, sizeof(*fs_chunk));
2165                 chunk = btrfs_item_ptr(eb, i, struct btrfs_chunk);
2166
2167                 fs_chunk->logical = key.offset;
2168                 fs_chunk->physical = btrfs_stripe_offset_nr(eb, chunk, 0);
2169                 fs_chunk->bytes = btrfs_chunk_length(eb, chunk);
2170                 INIT_LIST_HEAD(&fs_chunk->list);
2171                 if (tree_search(&mdres->physical_tree, &fs_chunk->p,
2172                                 physical_cmp, 1) != NULL)
2173                         list_add(&fs_chunk->list, &mdres->overlapping_chunks);
2174                 else
2175                         tree_insert(&mdres->physical_tree, &fs_chunk->p,
2176                                     physical_cmp);
2177
2178                 type = btrfs_chunk_type(eb, chunk);
2179                 if (type & BTRFS_BLOCK_GROUP_DUP) {
2180                         fs_chunk->physical_dup =
2181                                         btrfs_stripe_offset_nr(eb, chunk, 1);
2182                 }
2183
2184                 if (fs_chunk->physical_dup + fs_chunk->bytes >
2185                     mdres->last_physical_offset)
2186                         mdres->last_physical_offset = fs_chunk->physical_dup +
2187                                 fs_chunk->bytes;
2188                 else if (fs_chunk->physical + fs_chunk->bytes >
2189                     mdres->last_physical_offset)
2190                         mdres->last_physical_offset = fs_chunk->physical +
2191                                 fs_chunk->bytes;
2192                 mdres->alloced_chunks += fs_chunk->bytes;
2193                 /* in dup case, fs_chunk->bytes should add twice */
2194                 if (fs_chunk->physical_dup)
2195                         mdres->alloced_chunks += fs_chunk->bytes;
2196                 tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp);
2197         }
2198 out:
2199         free(eb);
2200         return ret;
2201 }
2202
2203 /* If you have to ask you aren't worthy */
2204 static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
2205                                    u64 search, u64 cluster_bytenr)
2206 {
2207         struct meta_cluster *cluster;
2208         struct meta_cluster_header *header;
2209         struct meta_cluster_item *item;
2210         u64 current_cluster = cluster_bytenr, bytenr;
2211         u64 item_bytenr;
2212         u32 bufsize, nritems, i;
2213         u32 max_size = MAX_PENDING_SIZE * 2;
2214         u8 *buffer, *tmp = NULL;
2215         int ret = 0;
2216
2217         cluster = malloc(BLOCK_SIZE);
2218         if (!cluster) {
2219                 error("not enough memory for cluster");
2220                 return -ENOMEM;
2221         }
2222
2223         buffer = malloc(max_size);
2224         if (!buffer) {
2225                 error("not enough memory for buffer");
2226                 free(cluster);
2227                 return -ENOMEM;
2228         }
2229
2230         if (mdres->compress_method == COMPRESS_ZLIB) {
2231                 tmp = malloc(max_size);
2232                 if (!tmp) {
2233                         error("not enough memory for buffer");
2234                         free(cluster);
2235                         free(buffer);
2236                         return -ENOMEM;
2237                 }
2238         }
2239
2240         bytenr = current_cluster;
2241         while (1) {
2242                 if (fseek(mdres->in, current_cluster, SEEK_SET)) {
2243                         error("seek failed: %s", strerror(errno));
2244                         ret = -EIO;
2245                         break;
2246                 }
2247
2248                 ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2249                 if (ret == 0) {
2250                         if (cluster_bytenr != 0) {
2251                                 cluster_bytenr = 0;
2252                                 current_cluster = 0;
2253                                 bytenr = 0;
2254                                 continue;
2255                         }
2256                         error(
2257         "unknown state after reading cluster at %llu, probably corrupted data",
2258                                         cluster_bytenr);
2259                         ret = -EIO;
2260                         break;
2261                 } else if (ret < 0) {
2262                         error("unable to read image at %llu: %s",
2263                                         (unsigned long long)cluster_bytenr,
2264                                         strerror(errno));
2265                         break;
2266                 }
2267                 ret = 0;
2268
2269                 header = &cluster->header;
2270                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2271                     le64_to_cpu(header->bytenr) != current_cluster) {
2272                         error("bad header in metadump image");
2273                         ret = -EIO;
2274                         break;
2275                 }
2276
2277                 bytenr += BLOCK_SIZE;
2278                 nritems = le32_to_cpu(header->nritems);
2279                 for (i = 0; i < nritems; i++) {
2280                         size_t size;
2281
2282                         item = &cluster->items[i];
2283                         bufsize = le32_to_cpu(item->size);
2284                         item_bytenr = le64_to_cpu(item->bytenr);
2285
2286                         if (bufsize > max_size) {
2287                                 error("item %u too big: %u > %u", i, bufsize,
2288                                                 max_size);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292
2293                         if (mdres->compress_method == COMPRESS_ZLIB) {
2294                                 ret = fread(tmp, bufsize, 1, mdres->in);
2295                                 if (ret != 1) {
2296                                         error("read error: %s", strerror(errno));
2297                                         ret = -EIO;
2298                                         break;
2299                                 }
2300
2301                                 size = max_size;
2302                                 ret = uncompress(buffer,
2303                                                  (unsigned long *)&size, tmp,
2304                                                  bufsize);
2305                                 if (ret != Z_OK) {
2306                                         error("decompression failed with %d",
2307                                                         ret);
2308                                         ret = -EIO;
2309                                         break;
2310                                 }
2311                         } else {
2312                                 ret = fread(buffer, bufsize, 1, mdres->in);
2313                                 if (ret != 1) {
2314                                         error("read error: %s",
2315                                                         strerror(errno));
2316                                         ret = -EIO;
2317                                         break;
2318                                 }
2319                                 size = bufsize;
2320                         }
2321                         ret = 0;
2322
2323                         if (item_bytenr <= search &&
2324                             item_bytenr + size > search) {
2325                                 ret = read_chunk_block(mdres, buffer, search,
2326                                                        item_bytenr, size,
2327                                                        current_cluster);
2328                                 if (!ret)
2329                                         ret = 1;
2330                                 break;
2331                         }
2332                         bytenr += bufsize;
2333                 }
2334                 if (ret) {
2335                         if (ret > 0)
2336                                 ret = 0;
2337                         break;
2338                 }
2339                 if (bytenr & BLOCK_MASK)
2340                         bytenr += BLOCK_SIZE - (bytenr & BLOCK_MASK);
2341                 current_cluster = bytenr;
2342         }
2343
2344         free(tmp);
2345         free(buffer);
2346         free(cluster);
2347         return ret;
2348 }
2349
2350 static int build_chunk_tree(struct mdrestore_struct *mdres,
2351                             struct meta_cluster *cluster)
2352 {
2353         struct btrfs_super_block *super;
2354         struct meta_cluster_header *header;
2355         struct meta_cluster_item *item = NULL;
2356         u64 chunk_root_bytenr = 0;
2357         u32 i, nritems;
2358         u64 bytenr = 0;
2359         u8 *buffer;
2360         int ret;
2361
2362         /* We can't seek with stdin so don't bother doing this */
2363         if (mdres->in == stdin)
2364                 return 0;
2365
2366         ret = fread(cluster, BLOCK_SIZE, 1, mdres->in);
2367         if (ret <= 0) {
2368                 error("unable to read cluster: %s", strerror(errno));
2369                 return -EIO;
2370         }
2371         ret = 0;
2372
2373         header = &cluster->header;
2374         if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2375             le64_to_cpu(header->bytenr) != 0) {
2376                 error("bad header in metadump image");
2377                 return -EIO;
2378         }
2379
2380         bytenr += BLOCK_SIZE;
2381         mdres->compress_method = header->compress;
2382         nritems = le32_to_cpu(header->nritems);
2383         for (i = 0; i < nritems; i++) {
2384                 item = &cluster->items[i];
2385
2386                 if (le64_to_cpu(item->bytenr) == BTRFS_SUPER_INFO_OFFSET)
2387                         break;
2388                 bytenr += le32_to_cpu(item->size);
2389                 if (fseek(mdres->in, le32_to_cpu(item->size), SEEK_CUR)) {
2390                         error("seek failed: %s", strerror(errno));
2391                         return -EIO;
2392                 }
2393         }
2394
2395         if (!item || le64_to_cpu(item->bytenr) != BTRFS_SUPER_INFO_OFFSET) {
2396                 error("did not find superblock at %llu",
2397                                 le64_to_cpu(item->bytenr));
2398                 return -EINVAL;
2399         }
2400
2401         buffer = malloc(le32_to_cpu(item->size));
2402         if (!buffer) {
2403                 error("not enough memory to allocate buffer");
2404                 return -ENOMEM;
2405         }
2406
2407         ret = fread(buffer, le32_to_cpu(item->size), 1, mdres->in);
2408         if (ret != 1) {
2409                 error("unable to read buffer: %s", strerror(errno));
2410                 free(buffer);
2411                 return -EIO;
2412         }
2413
2414         if (mdres->compress_method == COMPRESS_ZLIB) {
2415                 size_t size = MAX_PENDING_SIZE * 2;
2416                 u8 *tmp;
2417
2418                 tmp = malloc(MAX_PENDING_SIZE * 2);
2419                 if (!tmp) {
2420                         free(buffer);
2421                         return -ENOMEM;
2422                 }
2423                 ret = uncompress(tmp, (unsigned long *)&size,
2424                                  buffer, le32_to_cpu(item->size));
2425                 if (ret != Z_OK) {
2426                         error("decompression failed with %d", ret);
2427                         free(buffer);
2428                         free(tmp);
2429                         return -EIO;
2430                 }
2431                 free(buffer);
2432                 buffer = tmp;
2433         }
2434
2435         pthread_mutex_lock(&mdres->mutex);
2436         super = (struct btrfs_super_block *)buffer;
2437         chunk_root_bytenr = btrfs_super_chunk_root(super);
2438         mdres->nodesize = btrfs_super_nodesize(super);
2439         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
2440         memcpy(mdres->uuid, super->dev_item.uuid,
2441                        BTRFS_UUID_SIZE);
2442         mdres->devid = le64_to_cpu(super->dev_item.devid);
2443         free(buffer);
2444         pthread_mutex_unlock(&mdres->mutex);
2445
2446         return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
2447 }
2448
2449 static int range_contains_super(u64 physical, u64 bytes)
2450 {
2451         u64 super_bytenr;
2452         int i;
2453
2454         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2455                 super_bytenr = btrfs_sb_offset(i);
2456                 if (super_bytenr >= physical &&
2457                     super_bytenr < physical + bytes)
2458                         return 1;
2459         }
2460
2461         return 0;
2462 }
2463
2464 static void remap_overlapping_chunks(struct mdrestore_struct *mdres)
2465 {
2466         struct fs_chunk *fs_chunk;
2467
2468         while (!list_empty(&mdres->overlapping_chunks)) {
2469                 fs_chunk = list_first_entry(&mdres->overlapping_chunks,
2470                                             struct fs_chunk, list);
2471                 list_del_init(&fs_chunk->list);
2472                 if (range_contains_super(fs_chunk->physical,
2473                                          fs_chunk->bytes)) {
2474                         warning(
2475 "remapping a chunk that had a super mirror inside of it, clearing space cache so we don't end up with corruption");
2476                         mdres->clear_space_cache = 1;
2477                 }
2478                 fs_chunk->physical = mdres->last_physical_offset;
2479                 tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp);
2480                 mdres->last_physical_offset += fs_chunk->bytes;
2481         }
2482 }
2483
2484 static int fixup_devices(struct btrfs_fs_info *fs_info,
2485                          struct mdrestore_struct *mdres, off_t dev_size)
2486 {
2487         struct btrfs_trans_handle *trans;
2488         struct btrfs_dev_item *dev_item;
2489         struct btrfs_path path;
2490         struct extent_buffer *leaf;
2491         struct btrfs_root *root = fs_info->chunk_root;
2492         struct btrfs_key key;
2493         u64 devid, cur_devid;
2494         int ret;
2495
2496         trans = btrfs_start_transaction(fs_info->tree_root, 1);
2497         if (IS_ERR(trans)) {
2498                 error("cannot starting transaction %ld", PTR_ERR(trans));
2499                 return PTR_ERR(trans);
2500         }
2501
2502         dev_item = &fs_info->super_copy->dev_item;
2503
2504         devid = btrfs_stack_device_id(dev_item);
2505
2506         btrfs_set_stack_device_total_bytes(dev_item, dev_size);
2507         btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks);
2508
2509         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2510         key.type = BTRFS_DEV_ITEM_KEY;
2511         key.offset = 0;
2512
2513         btrfs_init_path(&path);
2514
2515 again:
2516         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
2517         if (ret < 0) {
2518                 error("search failed: %d", ret);
2519                 exit(1);
2520         }
2521
2522         while (1) {
2523                 leaf = path.nodes[0];
2524                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2525                         ret = btrfs_next_leaf(root, &path);
2526                         if (ret < 0) {
2527                                 error("cannot go to next leaf %d", ret);
2528                                 exit(1);
2529                         }
2530                         if (ret > 0) {
2531                                 ret = 0;
2532                                 break;
2533                         }
2534                         leaf = path.nodes[0];
2535                 }
2536
2537                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2538                 if (key.type > BTRFS_DEV_ITEM_KEY)
2539                         break;
2540                 if (key.type != BTRFS_DEV_ITEM_KEY) {
2541                         path.slots[0]++;
2542                         continue;
2543                 }
2544
2545                 dev_item = btrfs_item_ptr(leaf, path.slots[0],
2546                                           struct btrfs_dev_item);
2547                 cur_devid = btrfs_device_id(leaf, dev_item);
2548                 if (devid != cur_devid) {
2549                         ret = btrfs_del_item(trans, root, &path);
2550                         if (ret) {
2551                                 error("cannot delete item: %d", ret);
2552                                 exit(1);
2553                         }
2554                         btrfs_release_path(&path);
2555                         goto again;
2556                 }
2557
2558                 btrfs_set_device_total_bytes(leaf, dev_item, dev_size);
2559                 btrfs_set_device_bytes_used(leaf, dev_item,
2560                                             mdres->alloced_chunks);
2561                 btrfs_mark_buffer_dirty(leaf);
2562                 path.slots[0]++;
2563         }
2564
2565         btrfs_release_path(&path);
2566         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
2567         if (ret) {
2568                 error("unable to commit transaction: %d", ret);
2569                 return ret;
2570         }
2571         return 0;
2572 }
2573
2574 static int restore_metadump(const char *input, FILE *out, int old_restore,
2575                             int num_threads, int fixup_offset,
2576                             const char *target, int multi_devices)
2577 {
2578         struct meta_cluster *cluster = NULL;
2579         struct meta_cluster_header *header;
2580         struct mdrestore_struct mdrestore;
2581         struct btrfs_fs_info *info = NULL;
2582         u64 bytenr = 0;
2583         FILE *in = NULL;
2584         int ret = 0;
2585
2586         if (!strcmp(input, "-")) {
2587                 in = stdin;
2588         } else {
2589                 in = fopen(input, "r");
2590                 if (!in) {
2591                         error("unable to open metadump image: %s",
2592                                         strerror(errno));
2593                         return 1;
2594                 }
2595         }
2596
2597         /* NOTE: open with write mode */
2598         if (fixup_offset) {
2599                 info = open_ctree_fs_info(target, 0, 0, 0,
2600                                           OPEN_CTREE_WRITES |
2601                                           OPEN_CTREE_RESTORE |
2602                                           OPEN_CTREE_PARTIAL);
2603                 if (!info) {
2604                         error("open ctree failed");
2605                         ret = -EIO;
2606                         goto failed_open;
2607                 }
2608         }
2609
2610         cluster = malloc(BLOCK_SIZE);
2611         if (!cluster) {
2612                 error("not enough memory for cluster");
2613                 ret = -ENOMEM;
2614                 goto failed_info;
2615         }
2616
2617         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
2618                              fixup_offset, info, multi_devices);
2619         if (ret) {
2620                 error("failed to initialize metadata restore state: %d", ret);
2621                 goto failed_cluster;
2622         }
2623
2624         if (!multi_devices && !old_restore) {
2625                 ret = build_chunk_tree(&mdrestore, cluster);
2626                 if (ret)
2627                         goto out;
2628                 if (!list_empty(&mdrestore.overlapping_chunks))
2629                         remap_overlapping_chunks(&mdrestore);
2630         }
2631
2632         if (in != stdin && fseek(in, 0, SEEK_SET)) {
2633                 error("seek failed: %s", strerror(errno));
2634                 goto out;
2635         }
2636
2637         while (!mdrestore.error) {
2638                 ret = fread(cluster, BLOCK_SIZE, 1, in);
2639                 if (!ret)
2640                         break;
2641
2642                 header = &cluster->header;
2643                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
2644                     le64_to_cpu(header->bytenr) != bytenr) {
2645                         error("bad header in metadump image");
2646                         ret = -EIO;
2647                         break;
2648                 }
2649                 ret = add_cluster(cluster, &mdrestore, &bytenr);
2650                 if (ret) {
2651                         error("failed to add cluster: %d", ret);
2652                         break;
2653                 }
2654         }
2655         ret = wait_for_worker(&mdrestore);
2656
2657         if (!ret && !multi_devices && !old_restore) {
2658                 struct btrfs_root *root;
2659                 struct stat st;
2660
2661                 root = open_ctree_fd(fileno(out), target, 0,
2662                                           OPEN_CTREE_PARTIAL |
2663                                           OPEN_CTREE_WRITES |
2664                                           OPEN_CTREE_NO_DEVICES);
2665                 if (!root) {
2666                         error("open ctree failed in %s", target);
2667                         ret = -EIO;
2668                         goto out;
2669                 }
2670                 info = root->fs_info;
2671
2672                 if (stat(target, &st)) {
2673                         error("stat %s failed: %s", target, strerror(errno));
2674                         close_ctree(info->chunk_root);
2675                         free(cluster);
2676                         return 1;
2677                 }
2678
2679                 ret = fixup_devices(info, &mdrestore, st.st_size);
2680                 close_ctree(info->chunk_root);
2681                 if (ret)
2682                         goto out;
2683         }
2684 out:
2685         mdrestore_destroy(&mdrestore, num_threads);
2686 failed_cluster:
2687         free(cluster);
2688 failed_info:
2689         if (fixup_offset && info)
2690                 close_ctree(info->chunk_root);
2691 failed_open:
2692         if (in != stdin)
2693                 fclose(in);
2694         return ret;
2695 }
2696
2697 static int update_disk_super_on_device(struct btrfs_fs_info *info,
2698                                        const char *other_dev, u64 cur_devid)
2699 {
2700         struct btrfs_key key;
2701         struct extent_buffer *leaf;
2702         struct btrfs_path path;
2703         struct btrfs_dev_item *dev_item;
2704         struct btrfs_super_block *disk_super;
2705         char dev_uuid[BTRFS_UUID_SIZE];
2706         char fs_uuid[BTRFS_UUID_SIZE];
2707         u64 devid, type, io_align, io_width;
2708         u64 sector_size, total_bytes, bytes_used;
2709         char buf[BTRFS_SUPER_INFO_SIZE];
2710         int fp = -1;
2711         int ret;
2712
2713         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2714         key.type = BTRFS_DEV_ITEM_KEY;
2715         key.offset = cur_devid;
2716
2717         btrfs_init_path(&path);
2718         ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0); 
2719         if (ret) {
2720                 error("search key failed: %d", ret);
2721                 ret = -EIO;
2722                 goto out;
2723         }
2724
2725         leaf = path.nodes[0];
2726         dev_item = btrfs_item_ptr(leaf, path.slots[0],
2727                                   struct btrfs_dev_item);
2728
2729         devid = btrfs_device_id(leaf, dev_item);
2730         if (devid != cur_devid) {
2731                 error("devid mismatch: %llu != %llu",
2732                                 (unsigned long long)devid,
2733                                 (unsigned long long)cur_devid);
2734                 ret = -EIO;
2735                 goto out;
2736         }
2737
2738         type = btrfs_device_type(leaf, dev_item);
2739         io_align = btrfs_device_io_align(leaf, dev_item);
2740         io_width = btrfs_device_io_width(leaf, dev_item);
2741         sector_size = btrfs_device_sector_size(leaf, dev_item);
2742         total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2743         bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2744         read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
2745         read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
2746
2747         btrfs_release_path(&path);
2748
2749         printf("update disk super on %s devid=%llu\n", other_dev, devid);
2750
2751         /* update other devices' super block */
2752         fp = open(other_dev, O_CREAT | O_RDWR, 0600);
2753         if (fp < 0) {
2754                 error("could not open %s: %s", other_dev, strerror(errno));
2755                 ret = -EIO;
2756                 goto out;
2757         }
2758
2759         memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
2760
2761         disk_super = (struct btrfs_super_block *)buf;
2762         dev_item = &disk_super->dev_item;
2763
2764         btrfs_set_stack_device_type(dev_item, type);
2765         btrfs_set_stack_device_id(dev_item, devid);
2766         btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
2767         btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
2768         btrfs_set_stack_device_io_align(dev_item, io_align);
2769         btrfs_set_stack_device_io_width(dev_item, io_width);
2770         btrfs_set_stack_device_sector_size(dev_item, sector_size);
2771         memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
2772         memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
2773         csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
2774
2775         ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
2776         if (ret != BTRFS_SUPER_INFO_SIZE) {
2777                 if (ret < 0)
2778                         error("cannot write superblock: %s", strerror(ret));
2779                 else
2780                         error("cannot write superblock");
2781                 ret = -EIO;
2782                 goto out;
2783         }
2784
2785         write_backup_supers(fp, (u8 *)buf);
2786
2787 out:
2788         if (fp != -1)
2789                 close(fp);
2790         return ret;
2791 }
2792
2793 static void print_usage(int ret)
2794 {
2795         printf("usage: btrfs-image [options] source target\n");
2796         printf("\t-r      \trestore metadump image\n");
2797         printf("\t-c value\tcompression level (0 ~ 9)\n");
2798         printf("\t-t value\tnumber of threads (1 ~ 32)\n");
2799         printf("\t-o      \tdon't mess with the chunk tree when restoring\n");
2800         printf("\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
2801         printf("\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
2802         printf("\t-m       \trestore for multiple devices\n");
2803         printf("\n");
2804         printf("\tIn the dump mode, source is the btrfs device and target is the output file (use '-' for stdout).\n");
2805         printf("\tIn the restore mode, source is the dumped image and target is the btrfs device/file.\n");
2806         exit(ret);
2807 }
2808
2809 int main(int argc, char *argv[])
2810 {
2811         char *source;
2812         char *target;
2813         u64 num_threads = 0;
2814         u64 compress_level = 0;
2815         int create = 1;
2816         int old_restore = 0;
2817         int walk_trees = 0;
2818         int multi_devices = 0;
2819         int ret;
2820         enum sanitize_mode sanitize = SANITIZE_NONE;
2821         int dev_cnt = 0;
2822         int usage_error = 0;
2823         FILE *out;
2824
2825         while (1) {
2826                 static const struct option long_options[] = {
2827                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
2828                         { NULL, 0, NULL, 0 }
2829                 };
2830                 int c = getopt_long(argc, argv, "rc:t:oswm", long_options, NULL);
2831                 if (c < 0)
2832                         break;
2833                 switch (c) {
2834                 case 'r':
2835                         create = 0;
2836                         break;
2837                 case 't':
2838                         num_threads = arg_strtou64(optarg);
2839                         if (num_threads > MAX_WORKER_THREADS) {
2840                                 error("number of threads out of range: %llu > %d",
2841                                         (unsigned long long)num_threads,
2842                                         MAX_WORKER_THREADS);
2843                                 return 1;
2844                         }
2845                         break;
2846                 case 'c':
2847                         compress_level = arg_strtou64(optarg);
2848                         if (compress_level > 9) {
2849                                 error("compression level out of range: %llu",
2850                                         (unsigned long long)compress_level);
2851                                 return 1;
2852                         }
2853                         break;
2854                 case 'o':
2855                         old_restore = 1;
2856                         break;
2857                 case 's':
2858                         if (sanitize == SANITIZE_NONE)
2859                                 sanitize = SANITIZE_NAMES;
2860                         else if (sanitize == SANITIZE_NAMES)
2861                                 sanitize = SANITIZE_COLLISIONS;
2862                         break;
2863                 case 'w':
2864                         walk_trees = 1;
2865                         break;
2866                 case 'm':
2867                         create = 0;
2868                         multi_devices = 1;
2869                         break;
2870                         case GETOPT_VAL_HELP:
2871                 default:
2872                         print_usage(c != GETOPT_VAL_HELP);
2873                 }
2874         }
2875
2876         set_argv0(argv);
2877         if (check_argc_min(argc - optind, 2))
2878                 print_usage(1);
2879
2880         dev_cnt = argc - optind - 1;
2881
2882         if (create) {
2883                 if (old_restore) {
2884                         error(
2885                         "create and restore cannot be used at the same time");
2886                         usage_error++;
2887                 }
2888         } else {
2889                 if (walk_trees || sanitize != SANITIZE_NONE || compress_level) {
2890                         error(
2891                         "useing -w, -s, -c options for restore makes no sense");
2892                         usage_error++;
2893                 }
2894                 if (multi_devices && dev_cnt < 2) {
2895                         error("not enough devices specified for -m option");
2896                         usage_error++;
2897                 }
2898                 if (!multi_devices && dev_cnt != 1) {
2899                         error("accepts only 1 device without -m option");
2900                         usage_error++;
2901                 }
2902         }
2903
2904         if (usage_error)
2905                 print_usage(1);
2906
2907         source = argv[optind];
2908         target = argv[optind + 1];
2909
2910         if (create && !strcmp(target, "-")) {
2911                 out = stdout;
2912         } else {
2913                 out = fopen(target, "w+");
2914                 if (!out) {
2915                         error("unable to create target file %s", target);
2916                         exit(1);
2917                 }
2918         }
2919
2920         if (compress_level > 0 || create == 0) {
2921                 if (num_threads == 0) {
2922                         long tmp = sysconf(_SC_NPROCESSORS_ONLN);
2923
2924                         if (tmp <= 0)
2925                                 tmp = 1;
2926                         num_threads = tmp;
2927                 }
2928         } else {
2929                 num_threads = 0;
2930         }
2931
2932         if (create) {
2933                 ret = check_mounted(source);
2934                 if (ret < 0) {
2935                         warning("unable to check mount status of: %s",
2936                                         strerror(-ret));
2937                 } else if (ret) {
2938                         warning("%s already mounted, results may be inaccurate",
2939                                         source);
2940                 }
2941
2942                 ret = create_metadump(source, out, num_threads,
2943                                       compress_level, sanitize, walk_trees);
2944         } else {
2945                 ret = restore_metadump(source, out, old_restore, num_threads,
2946                                        0, target, multi_devices);
2947         }
2948         if (ret) {
2949                 error("%s failed: %s", (create) ? "create" : "restore",
2950                        strerror(errno));
2951                 goto out;
2952         }
2953
2954          /* extended support for multiple devices */
2955         if (!create && multi_devices) {
2956                 struct btrfs_fs_info *info;
2957                 u64 total_devs;
2958                 int i;
2959
2960                 info = open_ctree_fs_info(target, 0, 0, 0,
2961                                           OPEN_CTREE_PARTIAL |
2962                                           OPEN_CTREE_RESTORE);
2963                 if (!info) {
2964                         error("open ctree failed at %s", target);
2965                         return 1;
2966                 }
2967
2968                 total_devs = btrfs_super_num_devices(info->super_copy);
2969                 if (total_devs != dev_cnt) {
2970                         error("it needs %llu devices but has only %d",
2971                                 total_devs, dev_cnt);
2972                         close_ctree(info->chunk_root);
2973                         goto out;
2974                 }
2975
2976                 /* update super block on other disks */
2977                 for (i = 2; i <= dev_cnt; i++) {
2978                         ret = update_disk_super_on_device(info,
2979                                         argv[optind + i], (u64)i);
2980                         if (ret) {
2981                                 error("update disk superblock failed devid %d: %d",
2982                                         i, ret);
2983                                 close_ctree(info->chunk_root);
2984                                 exit(1);
2985                         }
2986                 }
2987
2988                 close_ctree(info->chunk_root);
2989
2990                 /* fix metadata block to map correct chunk */
2991                 ret = restore_metadump(source, out, 0, num_threads, 1,
2992                                        target, 1);
2993                 if (ret) {
2994                         error("unable to fixup metadump: %d", ret);
2995                         exit(1);
2996                 }
2997         }
2998 out:
2999         if (out == stdout) {
3000                 fflush(out);
3001         } else {
3002                 fclose(out);
3003                 if (ret && create) {
3004                         int unlink_ret;
3005
3006                         unlink_ret = unlink(target);
3007                         if (unlink_ret)
3008                                 error("unlink output file %s failed: %s",
3009                                                 target, strerror(errno));
3010                 }
3011         }
3012
3013         btrfs_close_all_devices();
3014
3015         return !!ret;
3016 }