Btrfs-image: add the ability to santize file names when making an image
[platform/upstream/btrfs-progs.git] / btrfs-image.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <pthread.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <dirent.h>
29 #include <zlib.h>
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "version.h"
37 #include "volumes.h"
38
39 #define HEADER_MAGIC            0xbd5c25e27295668bULL
40 #define MAX_PENDING_SIZE        (256 * 1024)
41 #define BLOCK_SIZE              1024
42 #define BLOCK_MASK              (BLOCK_SIZE - 1)
43
44 #define COMPRESS_NONE           0
45 #define COMPRESS_ZLIB           1
46
47 struct meta_cluster_item {
48         __le64 bytenr;
49         __le32 size;
50 } __attribute__ ((__packed__));
51
52 struct meta_cluster_header {
53         __le64 magic;
54         __le64 bytenr;
55         __le32 nritems;
56         u8 compress;
57 } __attribute__ ((__packed__));
58
59 /* cluster header + index items + buffers */
60 struct meta_cluster {
61         struct meta_cluster_header header;
62         struct meta_cluster_item items[];
63 } __attribute__ ((__packed__));
64
65 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
66                            sizeof(struct meta_cluster_item))
67
68 struct async_work {
69         struct list_head list;
70         struct list_head ordered;
71         u64 start;
72         u64 size;
73         u8 *buffer;
74         size_t bufsize;
75         int error;
76 };
77
78 struct metadump_struct {
79         struct btrfs_root *root;
80         FILE *out;
81
82         struct meta_cluster *cluster;
83
84         pthread_t *threads;
85         size_t num_threads;
86         pthread_mutex_t mutex;
87         pthread_cond_t cond;
88         struct rb_root name_tree;
89
90         struct list_head list;
91         struct list_head ordered;
92         size_t num_items;
93         size_t num_ready;
94
95         u64 pending_start;
96         u64 pending_size;
97
98         int compress_level;
99         int done;
100         int data;
101         int sanitize_names;
102 };
103
104 struct name {
105         struct rb_node n;
106         char *val;
107         char *sub;
108         u32 len;
109 };
110
111 struct mdrestore_struct {
112         FILE *in;
113         FILE *out;
114
115         pthread_t *threads;
116         size_t num_threads;
117         pthread_mutex_t mutex;
118         pthread_cond_t cond;
119
120         struct list_head list;
121         size_t num_items;
122         u64 leafsize;
123         u64 devid;
124         u8 uuid[BTRFS_UUID_SIZE];
125         u8 fsid[BTRFS_FSID_SIZE];
126
127         int compress_method;
128         int done;
129         int error;
130         int old_restore;
131 };
132
133 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size);
134
135 static void csum_block(u8 *buf, size_t len)
136 {
137         char result[BTRFS_CRC32_SIZE];
138         u32 crc = ~(u32)0;
139         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
140         btrfs_csum_final(crc, result);
141         memcpy(buf, result, BTRFS_CRC32_SIZE);
142 }
143
144 static int has_name(struct btrfs_key *key)
145 {
146         switch (key->type) {
147         case BTRFS_DIR_ITEM_KEY:
148         case BTRFS_DIR_INDEX_KEY:
149         case BTRFS_INODE_REF_KEY:
150         case BTRFS_INODE_EXTREF_KEY:
151                 return 1;
152         default:
153                 break;
154         }
155
156         return 0;
157 }
158
159 static char *generate_garbage(u32 name_len)
160 {
161         char *buf = malloc(name_len);
162         int i;
163
164         if (!buf)
165                 return NULL;
166
167         for (i = 0; i < name_len; i++) {
168                 char c = rand() % 94 + 33;
169
170                 if (c == '/')
171                         c++;
172                 buf[i] = c;
173         }
174
175         return buf;
176 }
177
178 static void tree_insert(struct rb_root *root, struct name *ins)
179 {
180         struct rb_node ** p = &root->rb_node;
181         struct rb_node * parent = NULL;
182         struct name *entry;
183         u32 len;
184         int dir;
185
186         while(*p) {
187                 parent = *p;
188                 entry = rb_entry(parent, struct name, n);
189
190                 len = min(ins->len, entry->len);
191                 dir = memcmp(ins->val, entry->val, len);
192
193                 if (dir < 0)
194                         p = &(*p)->rb_left;
195                 else if (dir > 0)
196                         p = &(*p)->rb_right;
197                 else
198                         BUG();
199         }
200
201         rb_link_node(&ins->n, parent, p);
202         rb_insert_color(&ins->n, root);
203 }
204
205 static struct name *name_search(struct rb_root *root, char *name, u32 name_len)
206 {
207         struct rb_node *n = root->rb_node;
208         struct name *entry = NULL;
209         u32 len;
210         int dir;
211
212         while (n) {
213                 entry = rb_entry(n, struct name, n);
214
215                 len = min(entry->len, name_len);
216
217                 dir = memcmp(name, entry->val, len);
218                 if (dir < 0)
219                         n = n->rb_left;
220                 else if (dir > 0)
221                         n = n->rb_right;
222                 else
223                         return entry;
224         }
225
226         return NULL;
227 }
228
229 static char *find_collision(struct metadump_struct *md, char *name,
230                             u32 name_len)
231 {
232         struct name *val;
233         unsigned long checksum;
234         int found = 0;
235         int i;
236
237         val = name_search(&md->name_tree, name, name_len);
238         if (val) {
239                 free(name);
240                 return val->sub;
241         }
242
243         val = malloc(sizeof(struct name));
244         if (!val) {
245                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
246                 return NULL;
247         }
248
249         memset(val, 0, sizeof(*val));
250
251         val->val = name;
252         val->len = name_len;
253         val->sub = malloc(name_len);
254         if (!val->sub) {
255                 fprintf(stderr, "Couldn't sanitize name, enomem\n");
256                 free(val);
257                 return NULL;
258         }
259
260         checksum = crc32c(~1, val->val, name_len);
261         memset(val->sub, ' ', name_len);
262         i = 0;
263         while (1) {
264                 if (crc32c(~1, val->sub, name_len) == checksum &&
265                     memcmp(val->sub, val->val, val->len)) {
266                         found = 1;
267                         break;
268                 }
269
270                 if (val->sub[i] == 127) {
271                         do {
272                                 i++;
273                                 if (i > name_len)
274                                         break;
275                         } while (val->sub[i] == 127);
276
277                         if (i > name_len)
278                                 break;
279                         val->sub[i]++;
280                         if (val->sub[i] == '/')
281                                 val->sub[i]++;
282                         memset(val->sub, ' ', i);
283                         i = 0;
284                         continue;
285                 } else {
286                         val->sub[i]++;
287                         if (val->sub[i] == '/')
288                                 val->sub[i]++;
289                 }
290         }
291
292         if (!found) {
293                 fprintf(stderr, "Couldn't find a collision for '%.*s', "
294                         "generating normal garbage, it won't match indexes\n",
295                         val->len, val->val);
296                 for (i = 0; i < name_len; i++) {
297                         char c = rand() % 94 + 33;
298
299                         if (c == '/')
300                                 c++;
301                         val->sub[i] = c;
302                 }
303         }
304
305         tree_insert(&md->name_tree, val);
306         return val->sub;
307 }
308
309 static void sanitize_dir_item(struct metadump_struct *md, struct extent_buffer *eb,
310                               int slot)
311 {
312         struct btrfs_dir_item *dir_item;
313         char *buf;
314         char *garbage;
315         unsigned long name_ptr;
316         u32 total_len;
317         u32 cur = 0;
318         u32 this_len;
319         u32 name_len;
320         int free_garbage = (md->sanitize_names == 1);
321
322         dir_item = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
323         total_len = btrfs_item_size_nr(eb, slot);
324         while (cur < total_len) {
325                 this_len = sizeof(*dir_item) +
326                         btrfs_dir_name_len(eb, dir_item) +
327                         btrfs_dir_data_len(eb, dir_item);
328                 name_ptr = (unsigned long)(dir_item + 1);
329                 name_len = btrfs_dir_name_len(eb, dir_item);
330
331                 if (md->sanitize_names > 1) {
332                         buf = malloc(name_len);
333                         if (!buf) {
334                                 fprintf(stderr, "Couldn't sanitize name, "
335                                         "enomem\n");
336                                 return;
337                         }
338                         read_extent_buffer(eb, buf, name_ptr, name_len);
339                         garbage = find_collision(md, buf, name_len);
340                 } else {
341                         garbage = generate_garbage(name_len);
342                 }
343                 if (!garbage) {
344                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
345                         return;
346                 }
347                 write_extent_buffer(eb, garbage, name_ptr, name_len);
348                 cur += this_len;
349                 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
350                                                      this_len);
351                 if (free_garbage)
352                         free(garbage);
353         }
354 }
355
356 static void sanitize_inode_ref(struct metadump_struct *md,
357                                struct extent_buffer *eb, int slot, int ext)
358 {
359         struct btrfs_inode_extref *extref;
360         struct btrfs_inode_ref *ref;
361         char *garbage, *buf;
362         unsigned long ptr;
363         unsigned long name_ptr;
364         u32 item_size;
365         u32 cur_offset = 0;
366         int len;
367         int free_garbage = (md->sanitize_names == 1);
368
369         item_size = btrfs_item_size_nr(eb, slot);
370         ptr = btrfs_item_ptr_offset(eb, slot);
371         while (cur_offset < item_size) {
372                 if (ext) {
373                         extref = (struct btrfs_inode_extref *)(ptr +
374                                                                cur_offset);
375                         name_ptr = (unsigned long)(&extref->name);
376                         len = btrfs_inode_extref_name_len(eb, extref);
377                         cur_offset += sizeof(*extref);
378                 } else {
379                         ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
380                         len = btrfs_inode_ref_name_len(eb, ref);
381                         name_ptr = (unsigned long)(ref + 1);
382                         cur_offset += sizeof(*ref);
383                 }
384                 cur_offset += len;
385
386                 if (md->sanitize_names > 1) {
387                         buf = malloc(len);
388                         if (!buf) {
389                                 fprintf(stderr, "Couldn't sanitize name, "
390                                         "enomem\n");
391                                 return;
392                         }
393                         read_extent_buffer(eb, buf, name_ptr, len);
394                         garbage = find_collision(md, buf, len);
395                 } else {
396                         garbage = generate_garbage(len);
397                 }
398
399                 if (!garbage) {
400                         fprintf(stderr, "Couldn't sanitize name, enomem\n");
401                         return;
402                 }
403                 write_extent_buffer(eb, garbage, name_ptr, len);
404                 if (free_garbage)
405                         free(garbage);
406         }
407 }
408
409 static void sanitize_name(struct metadump_struct *md, u8 *dst,
410                           struct extent_buffer *src, struct btrfs_key *key,
411                           int slot)
412 {
413         struct extent_buffer *eb;
414
415         eb = alloc_dummy_eb(src->start, src->len);
416         if (!eb) {
417                 fprintf(stderr, "Couldn't sanitize name, no memory\n");
418                 return;
419         }
420
421         memcpy(eb->data, dst, eb->len);
422
423         switch (key->type) {
424         case BTRFS_DIR_ITEM_KEY:
425         case BTRFS_DIR_INDEX_KEY:
426                 sanitize_dir_item(md, eb, slot);
427                 break;
428         case BTRFS_INODE_REF_KEY:
429                 sanitize_inode_ref(md, eb, slot, 0);
430                 break;
431         case BTRFS_INODE_EXTREF_KEY:
432                 sanitize_inode_ref(md, eb, slot, 1);
433                 break;
434         default:
435                 break;
436         }
437
438         memcpy(dst, eb->data, eb->len);
439         free(eb);
440 }
441
442 /*
443  * zero inline extents and csum items
444  */
445 static void zero_items(struct metadump_struct *md, u8 *dst,
446                        struct extent_buffer *src)
447 {
448         struct btrfs_file_extent_item *fi;
449         struct btrfs_item *item;
450         struct btrfs_key key;
451         u32 nritems = btrfs_header_nritems(src);
452         size_t size;
453         unsigned long ptr;
454         int i, extent_type;
455
456         for (i = 0; i < nritems; i++) {
457                 item = btrfs_item_nr(src, i);
458                 btrfs_item_key_to_cpu(src, &key, i);
459                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
460                         size = btrfs_item_size_nr(src, i);
461                         memset(dst + btrfs_leaf_data(src) +
462                                btrfs_item_offset_nr(src, i), 0, size);
463                         continue;
464                 }
465
466                 if (md->sanitize_names && has_name(&key)) {
467                         sanitize_name(md, dst, src, &key, i);
468                         continue;
469                 }
470
471                 if (key.type != BTRFS_EXTENT_DATA_KEY)
472                         continue;
473
474                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
475                 extent_type = btrfs_file_extent_type(src, fi);
476                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
477                         continue;
478
479                 ptr = btrfs_file_extent_inline_start(fi);
480                 size = btrfs_file_extent_inline_item_len(src, item);
481                 memset(dst + ptr, 0, size);
482         }
483 }
484
485 /*
486  * copy buffer and zero useless data in the buffer
487  */
488 static void copy_buffer(struct metadump_struct *md, u8 *dst,
489                         struct extent_buffer *src)
490 {
491         int level;
492         size_t size;
493         u32 nritems;
494
495         memcpy(dst, src->data, src->len);
496         if (src->start == BTRFS_SUPER_INFO_OFFSET)
497                 return;
498
499         level = btrfs_header_level(src);
500         nritems = btrfs_header_nritems(src);
501
502         if (nritems == 0) {
503                 size = sizeof(struct btrfs_header);
504                 memset(dst + size, 0, src->len - size);
505         } else if (level == 0) {
506                 size = btrfs_leaf_data(src) +
507                         btrfs_item_offset_nr(src, nritems - 1) -
508                         btrfs_item_nr_offset(nritems);
509                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
510                 zero_items(md, dst, src);
511         } else {
512                 size = offsetof(struct btrfs_node, ptrs) +
513                         sizeof(struct btrfs_key_ptr) * nritems;
514                 memset(dst + size, 0, src->len - size);
515         }
516         csum_block(dst, src->len);
517 }
518
519 static void *dump_worker(void *data)
520 {
521         struct metadump_struct *md = (struct metadump_struct *)data;
522         struct async_work *async;
523         int ret;
524
525         while (1) {
526                 pthread_mutex_lock(&md->mutex);
527                 while (list_empty(&md->list)) {
528                         if (md->done) {
529                                 pthread_mutex_unlock(&md->mutex);
530                                 goto out;
531                         }
532                         pthread_cond_wait(&md->cond, &md->mutex);
533                 }
534                 async = list_entry(md->list.next, struct async_work, list);
535                 list_del_init(&async->list);
536                 pthread_mutex_unlock(&md->mutex);
537
538                 if (md->compress_level > 0) {
539                         u8 *orig = async->buffer;
540
541                         async->bufsize = compressBound(async->size);
542                         async->buffer = malloc(async->bufsize);
543
544                         ret = compress2(async->buffer,
545                                          (unsigned long *)&async->bufsize,
546                                          orig, async->size, md->compress_level);
547
548                         if (ret != Z_OK)
549                                 async->error = 1;
550
551                         free(orig);
552                 }
553
554                 pthread_mutex_lock(&md->mutex);
555                 md->num_ready++;
556                 pthread_mutex_unlock(&md->mutex);
557         }
558 out:
559         pthread_exit(NULL);
560 }
561
562 static void meta_cluster_init(struct metadump_struct *md, u64 start)
563 {
564         struct meta_cluster_header *header;
565
566         md->num_items = 0;
567         md->num_ready = 0;
568         header = &md->cluster->header;
569         header->magic = cpu_to_le64(HEADER_MAGIC);
570         header->bytenr = cpu_to_le64(start);
571         header->nritems = cpu_to_le32(0);
572         header->compress = md->compress_level > 0 ?
573                            COMPRESS_ZLIB : COMPRESS_NONE;
574 }
575
576 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
577                          FILE *out, int num_threads, int compress_level,
578                          int sanitize_names)
579 {
580         int i, ret = 0;
581
582         memset(md, 0, sizeof(*md));
583         pthread_cond_init(&md->cond, NULL);
584         pthread_mutex_init(&md->mutex, NULL);
585         INIT_LIST_HEAD(&md->list);
586         INIT_LIST_HEAD(&md->ordered);
587         md->root = root;
588         md->out = out;
589         md->pending_start = (u64)-1;
590         md->compress_level = compress_level;
591         md->cluster = calloc(1, BLOCK_SIZE);
592         md->sanitize_names = sanitize_names;
593         if (sanitize_names > 1)
594                 crc32c_optimization_init();
595
596         if (!md->cluster) {
597                 pthread_cond_destroy(&md->cond);
598                 pthread_mutex_destroy(&md->mutex);
599                 return -ENOMEM;
600         }
601
602         meta_cluster_init(md, 0);
603         if (!num_threads)
604                 return 0;
605
606         md->name_tree.rb_node = NULL;
607         md->num_threads = num_threads;
608         md->threads = calloc(num_threads, sizeof(pthread_t));
609         if (!md->threads) {
610                 free(md->cluster);
611                 pthread_cond_destroy(&md->cond);
612                 pthread_mutex_destroy(&md->mutex);
613                 return -ENOMEM;
614         }
615
616         for (i = 0; i < num_threads; i++) {
617                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
618                 if (ret)
619                         break;
620         }
621
622         if (ret) {
623                 pthread_mutex_lock(&md->mutex);
624                 md->done = 1;
625                 pthread_cond_broadcast(&md->cond);
626                 pthread_mutex_unlock(&md->mutex);
627
628                 for (i--; i >= 0; i--)
629                         pthread_join(md->threads[i], NULL);
630
631                 pthread_cond_destroy(&md->cond);
632                 pthread_mutex_destroy(&md->mutex);
633                 free(md->cluster);
634                 free(md->threads);
635         }
636
637         return ret;
638 }
639
640 static void metadump_destroy(struct metadump_struct *md)
641 {
642         int i;
643         struct rb_node *n;
644
645         pthread_mutex_lock(&md->mutex);
646         md->done = 1;
647         pthread_cond_broadcast(&md->cond);
648         pthread_mutex_unlock(&md->mutex);
649
650         for (i = 0; i < md->num_threads; i++)
651                 pthread_join(md->threads[i], NULL);
652
653         pthread_cond_destroy(&md->cond);
654         pthread_mutex_destroy(&md->mutex);
655
656         while ((n = rb_first(&md->name_tree))) {
657                 struct name *name;
658
659                 name = rb_entry(n, struct name, n);
660                 rb_erase(n, &md->name_tree);
661                 free(name->val);
662                 free(name->sub);
663                 free(name);
664         }
665         free(md->threads);
666         free(md->cluster);
667 }
668
669 static int write_zero(FILE *out, size_t size)
670 {
671         static char zero[BLOCK_SIZE];
672         return fwrite(zero, size, 1, out);
673 }
674
675 static int write_buffers(struct metadump_struct *md, u64 *next)
676 {
677         struct meta_cluster_header *header = &md->cluster->header;
678         struct meta_cluster_item *item;
679         struct async_work *async;
680         u64 bytenr = 0;
681         u32 nritems = 0;
682         int ret;
683         int err = 0;
684
685         if (list_empty(&md->ordered))
686                 goto out;
687
688         /* wait until all buffers are compressed */
689         while (md->num_items > md->num_ready) {
690                 struct timespec ts = {
691                         .tv_sec = 0,
692                         .tv_nsec = 10000000,
693                 };
694                 pthread_mutex_unlock(&md->mutex);
695                 nanosleep(&ts, NULL);
696                 pthread_mutex_lock(&md->mutex);
697         }
698
699         /* setup and write index block */
700         list_for_each_entry(async, &md->ordered, ordered) {
701                 item = md->cluster->items + nritems;
702                 item->bytenr = cpu_to_le64(async->start);
703                 item->size = cpu_to_le32(async->bufsize);
704                 nritems++;
705         }
706         header->nritems = cpu_to_le32(nritems);
707
708         ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
709         if (ret != 1) {
710                 fprintf(stderr, "Error writing out cluster: %d\n", errno);
711                 return -EIO;
712         }
713
714         /* write buffers */
715         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
716         while (!list_empty(&md->ordered)) {
717                 async = list_entry(md->ordered.next, struct async_work,
718                                    ordered);
719                 list_del_init(&async->ordered);
720
721                 bytenr += async->bufsize;
722                 if (!err)
723                         ret = fwrite(async->buffer, async->bufsize, 1,
724                                      md->out);
725                 if (ret != 1) {
726                         err = -EIO;
727                         ret = 0;
728                         fprintf(stderr, "Error writing out cluster: %d\n",
729                                 errno);
730                 }
731
732                 free(async->buffer);
733                 free(async);
734         }
735
736         /* zero unused space in the last block */
737         if (!err && bytenr & BLOCK_MASK) {
738                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
739
740                 bytenr += size;
741                 ret = write_zero(md->out, size);
742                 if (ret != 1) {
743                         fprintf(stderr, "Error zeroing out buffer: %d\n",
744                                 errno);
745                         err = -EIO;
746                 }
747         }
748 out:
749         *next = bytenr;
750         return err;
751 }
752
753 static int read_data_extent(struct metadump_struct *md,
754                             struct async_work *async)
755 {
756         struct btrfs_multi_bio *multi = NULL;
757         struct btrfs_device *device;
758         u64 bytes_left = async->size;
759         u64 logical = async->start;
760         u64 offset = 0;
761         u64 bytenr;
762         u64 read_len;
763         ssize_t done;
764         int fd;
765         int ret;
766
767         while (bytes_left) {
768                 read_len = bytes_left;
769                 ret = btrfs_map_block(&md->root->fs_info->mapping_tree, READ,
770                                       logical, &read_len, &multi, 0, NULL);
771                 if (ret) {
772                         fprintf(stderr, "Couldn't map data block %d\n", ret);
773                         return ret;
774                 }
775
776                 device = multi->stripes[0].dev;
777
778                 if (device->fd == 0) {
779                         fprintf(stderr,
780                                 "Device we need to read from is not open\n");
781                         free(multi);
782                         return -EIO;
783                 }
784                 fd = device->fd;
785                 bytenr = multi->stripes[0].physical;
786                 free(multi);
787
788                 read_len = min(read_len, bytes_left);
789                 done = pread64(fd, async->buffer+offset, read_len, bytenr);
790                 if (done < read_len) {
791                         if (done < 0)
792                                 fprintf(stderr, "Error reading extent %d\n",
793                                         errno);
794                         else
795                                 fprintf(stderr, "Short read\n");
796                         return -EIO;
797                 }
798
799                 bytes_left -= done;
800                 offset += done;
801                 logical += done;
802         }
803
804         return 0;
805 }
806
807 static int flush_pending(struct metadump_struct *md, int done)
808 {
809         struct async_work *async = NULL;
810         struct extent_buffer *eb;
811         u64 blocksize = md->root->nodesize;
812         u64 start;
813         u64 size;
814         size_t offset;
815         int ret = 0;
816
817         if (md->pending_size) {
818                 async = calloc(1, sizeof(*async));
819                 if (!async)
820                         return -ENOMEM;
821
822                 async->start = md->pending_start;
823                 async->size = md->pending_size;
824                 async->bufsize = async->size;
825                 async->buffer = malloc(async->bufsize);
826                 if (!async->buffer) {
827                         free(async);
828                         return -ENOMEM;
829                 }
830                 offset = 0;
831                 start = async->start;
832                 size = async->size;
833
834                 if (md->data) {
835                         ret = read_data_extent(md, async);
836                         if (ret) {
837                                 free(async->buffer);
838                                 free(async);
839                                 return ret;
840                         }
841                 }
842
843                 while (!md->data && size > 0) {
844                         u64 this_read = min(blocksize, size);
845                         eb = read_tree_block(md->root, start, this_read, 0);
846                         if (!eb) {
847                                 free(async->buffer);
848                                 free(async);
849                                 fprintf(stderr,
850                                         "Error reading metadata block\n");
851                                 return -EIO;
852                         }
853                         copy_buffer(md, async->buffer + offset, eb);
854                         free_extent_buffer(eb);
855                         start += this_read;
856                         offset += this_read;
857                         size -= this_read;
858                 }
859
860                 md->pending_start = (u64)-1;
861                 md->pending_size = 0;
862         } else if (!done) {
863                 return 0;
864         }
865
866         pthread_mutex_lock(&md->mutex);
867         if (async) {
868                 list_add_tail(&async->ordered, &md->ordered);
869                 md->num_items++;
870                 if (md->compress_level > 0) {
871                         list_add_tail(&async->list, &md->list);
872                         pthread_cond_signal(&md->cond);
873                 } else {
874                         md->num_ready++;
875                 }
876         }
877         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
878                 ret = write_buffers(md, &start);
879                 if (ret)
880                         fprintf(stderr, "Error writing buffers %d\n",
881                                 errno);
882                 else
883                         meta_cluster_init(md, start);
884         }
885         pthread_mutex_unlock(&md->mutex);
886         return ret;
887 }
888
889 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
890                       int data)
891 {
892         int ret;
893         if (md->data != data ||
894             md->pending_size + size > MAX_PENDING_SIZE ||
895             md->pending_start + md->pending_size != start) {
896                 ret = flush_pending(md, 0);
897                 if (ret)
898                         return ret;
899                 md->pending_start = start;
900         }
901         readahead_tree_block(md->root, start, size, 0);
902         md->pending_size += size;
903         md->data = data;
904         return 0;
905 }
906
907 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
908 static int is_tree_block(struct btrfs_root *extent_root,
909                          struct btrfs_path *path, u64 bytenr)
910 {
911         struct extent_buffer *leaf;
912         struct btrfs_key key;
913         u64 ref_objectid;
914         int ret;
915
916         leaf = path->nodes[0];
917         while (1) {
918                 struct btrfs_extent_ref_v0 *ref_item;
919                 path->slots[0]++;
920                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
921                         ret = btrfs_next_leaf(extent_root, path);
922                         if (ret < 0)
923                                 return ret;
924                         if (ret > 0)
925                                 break;
926                         leaf = path->nodes[0];
927                 }
928                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
929                 if (key.objectid != bytenr)
930                         break;
931                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
932                         continue;
933                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
934                                           struct btrfs_extent_ref_v0);
935                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
936                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
937                         return 1;
938                 break;
939         }
940         return 0;
941 }
942 #endif
943
944 static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb,
945                             struct metadump_struct *metadump, int root_tree)
946 {
947         struct extent_buffer *tmp;
948         struct btrfs_root_item *ri;
949         struct btrfs_key key;
950         u64 bytenr;
951         int level;
952         int nritems = 0;
953         int i = 0;
954         int ret;
955
956         ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0);
957         if (ret) {
958                 fprintf(stderr, "Error adding metadata block\n");
959                 return ret;
960         }
961
962         if (btrfs_header_level(eb) == 0 && !root_tree)
963                 return 0;
964
965         level = btrfs_header_level(eb);
966         nritems = btrfs_header_nritems(eb);
967         for (i = 0; i < nritems; i++) {
968                 if (level == 0) {
969                         btrfs_item_key_to_cpu(eb, &key, i);
970                         if (key.type != BTRFS_ROOT_ITEM_KEY)
971                                 continue;
972                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
973                         bytenr = btrfs_disk_root_bytenr(eb, ri);
974                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
975                         if (!tmp) {
976                                 fprintf(stderr,
977                                         "Error reading log root block\n");
978                                 return -EIO;
979                         }
980                         ret = copy_tree_blocks(root, tmp, metadump, 0);
981                         free_extent_buffer(tmp);
982                         if (ret)
983                                 return ret;
984                 } else {
985                         bytenr = btrfs_node_blockptr(eb, i);
986                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
987                         if (!tmp) {
988                                 fprintf(stderr, "Error reading log block\n");
989                                 return -EIO;
990                         }
991                         ret = copy_tree_blocks(root, tmp, metadump, root_tree);
992                         free_extent_buffer(tmp);
993                         if (ret)
994                                 return ret;
995                 }
996         }
997
998         return 0;
999 }
1000
1001 static int copy_log_trees(struct btrfs_root *root,
1002                           struct metadump_struct *metadump,
1003                           struct btrfs_path *path)
1004 {
1005         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
1006
1007         if (blocknr == 0)
1008                 return 0;
1009
1010         if (!root->fs_info->log_root_tree ||
1011             !root->fs_info->log_root_tree->node) {
1012                 fprintf(stderr, "Error copying tree log, it wasn't setup\n");
1013                 return -EIO;
1014         }
1015
1016         return copy_tree_blocks(root, root->fs_info->log_root_tree->node,
1017                                 metadump, 1);
1018 }
1019
1020 static int copy_space_cache(struct btrfs_root *root,
1021                             struct metadump_struct *metadump,
1022                             struct btrfs_path *path)
1023 {
1024         struct extent_buffer *leaf;
1025         struct btrfs_file_extent_item *fi;
1026         struct btrfs_key key;
1027         u64 bytenr, num_bytes;
1028         int ret;
1029
1030         root = root->fs_info->tree_root;
1031
1032         key.objectid = 0;
1033         key.type = BTRFS_EXTENT_DATA_KEY;
1034         key.offset = 0;
1035
1036         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1037         if (ret < 0) {
1038                 fprintf(stderr, "Error searching for free space inode %d\n",
1039                         ret);
1040                 return ret;
1041         }
1042
1043         while (1) {
1044                 leaf = path->nodes[0];
1045                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1046                         ret = btrfs_next_leaf(root, path);
1047                         if (ret < 0) {
1048                                 fprintf(stderr, "Error going to next leaf "
1049                                         "%d\n", ret);
1050                                 return ret;
1051                         }
1052                         if (ret > 0)
1053                                 break;
1054                         leaf = path->nodes[0];
1055                 }
1056
1057                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1058                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
1059                         path->slots[0]++;
1060                         continue;
1061                 }
1062
1063                 fi = btrfs_item_ptr(leaf, path->slots[0],
1064                                     struct btrfs_file_extent_item);
1065                 if (btrfs_file_extent_type(leaf, fi) !=
1066                     BTRFS_FILE_EXTENT_REG) {
1067                         path->slots[0]++;
1068                         continue;
1069                 }
1070
1071                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1072                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1073                 ret = add_extent(bytenr, num_bytes, metadump, 1);
1074                 if (ret) {
1075                         fprintf(stderr, "Error adding space cache blocks %d\n",
1076                                 ret);
1077                         btrfs_release_path(root, path);
1078                         return ret;
1079                 }
1080                 path->slots[0]++;
1081         }
1082
1083         return 0;
1084 }
1085
1086 static int copy_from_extent_tree(struct metadump_struct *metadump,
1087                                  struct btrfs_path *path)
1088 {
1089         struct btrfs_root *extent_root;
1090         struct extent_buffer *leaf;
1091         struct btrfs_extent_item *ei;
1092         struct btrfs_key key;
1093         u64 bytenr;
1094         u64 num_bytes;
1095         int ret;
1096
1097         extent_root = metadump->root->fs_info->extent_root;
1098         bytenr = BTRFS_SUPER_INFO_OFFSET + 4096;
1099         key.objectid = bytenr;
1100         key.type = BTRFS_EXTENT_ITEM_KEY;
1101         key.offset = 0;
1102
1103         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1104         if (ret < 0) {
1105                 fprintf(stderr, "Error searching extent root %d\n", ret);
1106                 return ret;
1107         }
1108         ret = 0;
1109
1110         while (1) {
1111                 leaf = path->nodes[0];
1112                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1113                         ret = btrfs_next_leaf(extent_root, path);
1114                         if (ret < 0) {
1115                                 fprintf(stderr, "Error going to next leaf %d"
1116                                         "\n", ret);
1117                                 break;
1118                         }
1119                         if (ret > 0) {
1120                                 ret = 0;
1121                                 break;
1122                         }
1123                         leaf = path->nodes[0];
1124                 }
1125
1126                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1127                 if (key.objectid < bytenr ||
1128                     (key.type != BTRFS_EXTENT_ITEM_KEY &&
1129                      key.type != BTRFS_METADATA_ITEM_KEY)) {
1130                         path->slots[0]++;
1131                         continue;
1132                 }
1133
1134                 bytenr = key.objectid;
1135                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1136                         num_bytes = key.offset;
1137                 else
1138                         num_bytes = extent_root->leafsize;
1139
1140                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
1141                         ei = btrfs_item_ptr(leaf, path->slots[0],
1142                                             struct btrfs_extent_item);
1143                         if (btrfs_extent_flags(leaf, ei) &
1144                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1145                                 ret = add_extent(bytenr, num_bytes, metadump,
1146                                                  0);
1147                                 if (ret) {
1148                                         fprintf(stderr, "Error adding block "
1149                                                 "%d\n", ret);
1150                                         break;
1151                                 }
1152                         }
1153                 } else {
1154 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1155                         ret = is_tree_block(extent_root, path, bytenr);
1156                         if (ret < 0) {
1157                                 fprintf(stderr, "Error checking tree block "
1158                                         "%d\n", ret);
1159                                 break;
1160                         }
1161
1162                         if (ret) {
1163                                 ret = add_extent(bytenr, num_bytes, metadump,
1164                                                  0);
1165                                 if (ret) {
1166                                         fprintf(stderr, "Error adding block "
1167                                                 "%d\n", ret);
1168                                         break;
1169                                 }
1170                         }
1171                         ret = 0;
1172 #else
1173                         fprintf(stderr, "Either extent tree corruption or "
1174                                 "you haven't built with V0 support\n");
1175                         ret = -EIO;
1176                         break;
1177 #endif
1178                 }
1179                 bytenr += num_bytes;
1180         }
1181
1182         btrfs_release_path(extent_root, path);
1183
1184         return ret;
1185 }
1186
1187 static int create_metadump(const char *input, FILE *out, int num_threads,
1188                            int compress_level, int sanitize, int walk_trees)
1189 {
1190         struct btrfs_root *root;
1191         struct btrfs_path *path = NULL;
1192         struct metadump_struct metadump;
1193         int ret;
1194         int err = 0;
1195
1196         root = open_ctree(input, 0, 0);
1197         if (!root) {
1198                 fprintf(stderr, "Open ctree failed\n");
1199                 return -EIO;
1200         }
1201
1202         BUG_ON(root->nodesize != root->leafsize);
1203
1204         ret = metadump_init(&metadump, root, out, num_threads,
1205                             compress_level, sanitize);
1206         if (ret) {
1207                 fprintf(stderr, "Error initing metadump %d\n", ret);
1208                 close_ctree(root);
1209                 return ret;
1210         }
1211
1212         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump, 0);
1213         if (ret) {
1214                 fprintf(stderr, "Error adding metadata %d\n", ret);
1215                 err = ret;
1216                 goto out;
1217         }
1218
1219         path = btrfs_alloc_path();
1220         if (!path) {
1221                 fprintf(stderr, "Out of memory allocing path\n");
1222                 err = -ENOMEM;
1223                 goto out;
1224         }
1225
1226         if (walk_trees) {
1227                 ret = copy_tree_blocks(root, root->fs_info->chunk_root->node,
1228                                        &metadump, 1);
1229                 if (ret) {
1230                         err = ret;
1231                         goto out;
1232                 }
1233
1234                 ret = copy_tree_blocks(root, root->fs_info->tree_root->node,
1235                                        &metadump, 1);
1236                 if (ret) {
1237                         err = ret;
1238                         goto out;
1239                 }
1240         } else {
1241                 ret = copy_from_extent_tree(&metadump, path);
1242                 if (ret) {
1243                         err = ret;
1244                         goto out;
1245                 }
1246         }
1247
1248         ret = copy_log_trees(root, &metadump, path);
1249         if (ret) {
1250                 err = ret;
1251                 goto out;
1252         }
1253
1254         ret = copy_space_cache(root, &metadump, path);
1255 out:
1256         ret = flush_pending(&metadump, 1);
1257         if (ret) {
1258                 if (!err)
1259                         err = ret;
1260                 fprintf(stderr, "Error flushing pending %d\n", ret);
1261         }
1262
1263         metadump_destroy(&metadump);
1264
1265         btrfs_free_path(path);
1266         ret = close_ctree(root);
1267         return err ? err : ret;
1268 }
1269
1270 static void update_super_old(u8 *buffer)
1271 {
1272         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1273         struct btrfs_chunk *chunk;
1274         struct btrfs_disk_key *key;
1275         u32 sectorsize = btrfs_super_sectorsize(super);
1276         u64 flags = btrfs_super_flags(super);
1277
1278         flags |= BTRFS_SUPER_FLAG_METADUMP;
1279         btrfs_set_super_flags(super, flags);
1280
1281         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1282         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1283                                        sizeof(struct btrfs_disk_key));
1284
1285         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1286         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1287         btrfs_set_disk_key_offset(key, 0);
1288
1289         btrfs_set_stack_chunk_length(chunk, (u64)-1);
1290         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1291         btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
1292         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1293         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1294         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1295         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1296         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1297         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1298         chunk->stripe.devid = super->dev_item.devid;
1299         chunk->stripe.offset = cpu_to_le64(0);
1300         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1301         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1302         csum_block(buffer, 4096);
1303 }
1304
1305 static int update_super(u8 *buffer)
1306 {
1307         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
1308         struct btrfs_chunk *chunk;
1309         struct btrfs_disk_key *disk_key;
1310         struct btrfs_key key;
1311         u32 new_array_size = 0;
1312         u32 array_size;
1313         u32 cur = 0;
1314         u32 new_cur = 0;
1315         u8 *ptr, *write_ptr;
1316         int old_num_stripes;
1317
1318         write_ptr = ptr = super->sys_chunk_array;
1319         array_size = btrfs_super_sys_array_size(super);
1320
1321         while (cur < array_size) {
1322                 disk_key = (struct btrfs_disk_key *)ptr;
1323                 btrfs_disk_key_to_cpu(&key, disk_key);
1324
1325                 new_array_size += sizeof(*disk_key);
1326                 memmove(write_ptr, ptr, sizeof(*disk_key));
1327
1328                 write_ptr += sizeof(*disk_key);
1329                 ptr += sizeof(*disk_key);
1330                 cur += sizeof(*disk_key);
1331                 new_cur += sizeof(*disk_key);
1332
1333                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1334                         chunk = (struct btrfs_chunk *)ptr;
1335                         old_num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1336                         chunk = (struct btrfs_chunk *)write_ptr;
1337
1338                         memmove(write_ptr, ptr, sizeof(*chunk));
1339                         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1340                         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1341                         btrfs_set_stack_chunk_type(chunk,
1342                                                    BTRFS_BLOCK_GROUP_SYSTEM);
1343                         chunk->stripe.devid = super->dev_item.devid;
1344                         chunk->stripe.offset = cpu_to_le64(key.offset);
1345                         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid,
1346                                BTRFS_UUID_SIZE);
1347                         new_array_size += sizeof(*chunk);
1348                         new_cur += sizeof(*chunk);
1349                 } else {
1350                         fprintf(stderr, "Bogus key in the sys chunk array "
1351                                 "%d\n", key.type);
1352                         return -EIO;
1353                 }
1354                 write_ptr += sizeof(*chunk);
1355                 ptr += btrfs_chunk_item_size(old_num_stripes);
1356                 cur += btrfs_chunk_item_size(old_num_stripes);
1357         }
1358
1359         btrfs_set_super_sys_array_size(super, new_array_size);
1360         csum_block(buffer, 4096);
1361
1362         return 0;
1363 }
1364
1365 static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size)
1366 {
1367         struct extent_buffer *eb;
1368
1369         eb = malloc(sizeof(struct extent_buffer) + size);
1370         if (!eb)
1371                 return NULL;
1372         memset(eb, 0, sizeof(struct extent_buffer) + size);
1373
1374         eb->start = bytenr;
1375         eb->len = size;
1376         return eb;
1377 }
1378
1379 static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size)
1380 {
1381         struct btrfs_item *item;
1382         u32 nritems;
1383         u32 old_size;
1384         u32 old_data_start;
1385         u32 size_diff;
1386         u32 data_end;
1387         int i;
1388
1389         old_size = btrfs_item_size_nr(eb, slot);
1390         if (old_size == new_size)
1391                 return;
1392
1393         nritems = btrfs_header_nritems(eb);
1394         data_end = btrfs_item_offset_nr(eb, nritems - 1);
1395
1396         old_data_start = btrfs_item_offset_nr(eb, slot);
1397         size_diff = old_size - new_size;
1398
1399         for (i = slot; i < nritems; i++) {
1400                 u32 ioff;
1401                 item = btrfs_item_nr(eb, i);
1402                 ioff = btrfs_item_offset(eb, item);
1403                 btrfs_set_item_offset(eb, item, ioff + size_diff);
1404         }
1405
1406         memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff,
1407                               btrfs_leaf_data(eb) + data_end,
1408                               old_data_start + new_size - data_end);
1409         item = btrfs_item_nr(eb, slot);
1410         btrfs_set_item_size(eb, item, new_size);
1411 }
1412
1413 static int fixup_chunk_tree_block(struct mdrestore_struct *mdres,
1414                                   struct async_work *async, u8 *buffer,
1415                                   size_t size)
1416 {
1417         struct extent_buffer *eb;
1418         size_t size_left = size;
1419         u64 bytenr = async->start;
1420         int i;
1421
1422         if (size_left % mdres->leafsize)
1423                 return 0;
1424
1425         eb = alloc_dummy_eb(bytenr, mdres->leafsize);
1426         if (!eb)
1427                 return -ENOMEM;
1428
1429         while (size_left) {
1430                 eb->start = bytenr;
1431                 memcpy(eb->data, buffer, mdres->leafsize);
1432
1433                 if (btrfs_header_bytenr(eb) != bytenr)
1434                         break;
1435                 if (memcmp(mdres->fsid,
1436                            eb->data + offsetof(struct btrfs_header, fsid),
1437                            BTRFS_FSID_SIZE))
1438                         break;
1439
1440                 if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID)
1441                         goto next;
1442
1443                 if (btrfs_header_level(eb) != 0)
1444                         goto next;
1445
1446                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
1447                         struct btrfs_chunk chunk;
1448                         struct btrfs_key key;
1449                         u64 type;
1450
1451                         btrfs_item_key_to_cpu(eb, &key, i);
1452                         if (key.type != BTRFS_CHUNK_ITEM_KEY)
1453                                 continue;
1454                         truncate_item(eb, i, sizeof(chunk));
1455                         read_extent_buffer(eb, &chunk,
1456                                            btrfs_item_ptr_offset(eb, i),
1457                                            sizeof(chunk));
1458
1459                         /* Zero out the RAID profile */
1460                         type = btrfs_stack_chunk_type(&chunk);
1461                         type &= (BTRFS_BLOCK_GROUP_DATA |
1462                                  BTRFS_BLOCK_GROUP_SYSTEM |
1463                                  BTRFS_BLOCK_GROUP_METADATA);
1464                         btrfs_set_stack_chunk_type(&chunk, type);
1465
1466                         btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1467                         btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1468                         btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid);
1469                         btrfs_set_stack_stripe_offset(&chunk.stripe, key.offset);
1470                         memcpy(chunk.stripe.dev_uuid, mdres->uuid,
1471                                BTRFS_UUID_SIZE);
1472                         write_extent_buffer(eb, &chunk,
1473                                             btrfs_item_ptr_offset(eb, i),
1474                                             sizeof(chunk));
1475                 }
1476                 memcpy(buffer, eb->data, eb->len);
1477                 csum_block(buffer, eb->len);
1478 next:
1479                 size_left -= mdres->leafsize;
1480                 buffer += mdres->leafsize;
1481                 bytenr += mdres->leafsize;
1482         }
1483
1484         return 0;
1485 }
1486
1487 static void write_backup_supers(int fd, u8 *buf)
1488 {
1489         struct stat st;
1490         u64 size;
1491         u64 bytenr;
1492         int i;
1493         int ret;
1494
1495         if (fstat(fd, &st)) {
1496                 fprintf(stderr, "Couldn't stat restore point, won't be able "
1497                         "to write backup supers: %d\n", errno);
1498                 return;
1499         }
1500
1501         size = btrfs_device_size(fd, &st);
1502
1503         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1504                 bytenr = btrfs_sb_offset(i);
1505                 if (bytenr + 4096 > size)
1506                         break;
1507                 ret = pwrite64(fd, buf, 4096, bytenr);
1508                 if (ret < 4096) {
1509                         if (ret < 0)
1510                                 fprintf(stderr, "Problem writing out backup "
1511                                         "super block %d, err %d\n", i, errno);
1512                         else
1513                                 fprintf(stderr, "Short write writing out "
1514                                         "backup super block\n");
1515                         break;
1516                 }
1517         }
1518 }
1519
1520 static void *restore_worker(void *data)
1521 {
1522         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
1523         struct async_work *async;
1524         size_t size;
1525         u8 *buffer;
1526         u8 *outbuf;
1527         int outfd;
1528         int ret;
1529
1530         outfd = fileno(mdres->out);
1531         buffer = malloc(MAX_PENDING_SIZE * 2);
1532         if (!buffer) {
1533                 fprintf(stderr, "Error allocing buffer\n");
1534                 pthread_mutex_lock(&mdres->mutex);
1535                 if (!mdres->error)
1536                         mdres->error = -ENOMEM;
1537                 pthread_mutex_unlock(&mdres->mutex);
1538                 goto out;
1539         }
1540
1541         while (1) {
1542                 int err = 0;
1543
1544                 pthread_mutex_lock(&mdres->mutex);
1545                 while (!mdres->leafsize || list_empty(&mdres->list)) {
1546                         if (mdres->done) {
1547                                 pthread_mutex_unlock(&mdres->mutex);
1548                                 goto out;
1549                         }
1550                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
1551                 }
1552                 async = list_entry(mdres->list.next, struct async_work, list);
1553                 list_del_init(&async->list);
1554                 pthread_mutex_unlock(&mdres->mutex);
1555
1556                 if (mdres->compress_method == COMPRESS_ZLIB) {
1557                         size = MAX_PENDING_SIZE * 2;
1558                         ret = uncompress(buffer, (unsigned long *)&size,
1559                                          async->buffer, async->bufsize);
1560                         if (ret != Z_OK) {
1561                                 fprintf(stderr, "Error decompressing %d\n",
1562                                         ret);
1563                                 err = -EIO;
1564                         }
1565                         outbuf = buffer;
1566                 } else {
1567                         outbuf = async->buffer;
1568                         size = async->bufsize;
1569                 }
1570
1571                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1572                         if (mdres->old_restore) {
1573                                 update_super_old(outbuf);
1574                         } else {
1575                                 ret = update_super(outbuf);
1576                                 if (ret)
1577                                         err = ret;
1578                         }
1579                 } else if (!mdres->old_restore) {
1580                         ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
1581                         if (ret)
1582                                 err = ret;
1583                 }
1584
1585                 ret = pwrite64(outfd, outbuf, size, async->start);
1586                 if (ret < size) {
1587                         if (ret < 0) {
1588                                 fprintf(stderr, "Error writing to device %d\n",
1589                                         errno);
1590                                 err = errno;
1591                         } else {
1592                                 fprintf(stderr, "Short write\n");
1593                                 err = -EIO;
1594                         }
1595                 }
1596
1597                 if (async->start == BTRFS_SUPER_INFO_OFFSET)
1598                         write_backup_supers(outfd, outbuf);
1599
1600                 pthread_mutex_lock(&mdres->mutex);
1601                 if (err && !mdres->error)
1602                         mdres->error = err;
1603                 mdres->num_items--;
1604                 pthread_mutex_unlock(&mdres->mutex);
1605
1606                 free(async->buffer);
1607                 free(async);
1608         }
1609 out:
1610         free(buffer);
1611         pthread_exit(NULL);
1612 }
1613
1614 static void mdrestore_destroy(struct mdrestore_struct *mdres)
1615 {
1616         int i;
1617         pthread_mutex_lock(&mdres->mutex);
1618         mdres->done = 1;
1619         pthread_cond_broadcast(&mdres->cond);
1620         pthread_mutex_unlock(&mdres->mutex);
1621
1622         for (i = 0; i < mdres->num_threads; i++)
1623                 pthread_join(mdres->threads[i], NULL);
1624
1625         pthread_cond_destroy(&mdres->cond);
1626         pthread_mutex_destroy(&mdres->mutex);
1627         free(mdres->threads);
1628 }
1629
1630 static int mdrestore_init(struct mdrestore_struct *mdres,
1631                           FILE *in, FILE *out, int old_restore,
1632                           int num_threads)
1633 {
1634         int i, ret = 0;
1635
1636         memset(mdres, 0, sizeof(*mdres));
1637         pthread_cond_init(&mdres->cond, NULL);
1638         pthread_mutex_init(&mdres->mutex, NULL);
1639         INIT_LIST_HEAD(&mdres->list);
1640         mdres->in = in;
1641         mdres->out = out;
1642         mdres->old_restore = old_restore;
1643
1644         if (!num_threads)
1645                 return 0;
1646
1647         mdres->num_threads = num_threads;
1648         mdres->threads = calloc(num_threads, sizeof(pthread_t));
1649         if (!mdres->threads)
1650                 return -ENOMEM;
1651         for (i = 0; i < num_threads; i++) {
1652                 ret = pthread_create(mdres->threads + i, NULL, restore_worker,
1653                                      mdres);
1654                 if (ret)
1655                         break;
1656         }
1657         if (ret)
1658                 mdrestore_destroy(mdres);
1659         return ret;
1660 }
1661
1662 static int fill_mdres_info(struct mdrestore_struct *mdres,
1663                            struct async_work *async)
1664 {
1665         struct btrfs_super_block *super;
1666         u8 *buffer = NULL;
1667         u8 *outbuf;
1668         int ret;
1669
1670         if (mdres->compress_method == COMPRESS_ZLIB) {
1671                 size_t size = MAX_PENDING_SIZE * 2;
1672
1673                 buffer = malloc(MAX_PENDING_SIZE * 2);
1674                 if (!buffer)
1675                         return -ENOMEM;
1676                 ret = uncompress(buffer, (unsigned long *)&size,
1677                                  async->buffer, async->bufsize);
1678                 if (ret != Z_OK) {
1679                         fprintf(stderr, "Error decompressing %d\n", ret);
1680                         free(buffer);
1681                         return -EIO;
1682                 }
1683                 outbuf = buffer;
1684         } else {
1685                 outbuf = async->buffer;
1686         }
1687
1688         super = (struct btrfs_super_block *)outbuf;
1689         mdres->leafsize = btrfs_super_leafsize(super);
1690         memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE);
1691         memcpy(mdres->uuid, super->dev_item.uuid,
1692                        BTRFS_UUID_SIZE);
1693         mdres->devid = le64_to_cpu(super->dev_item.devid);
1694         free(buffer);
1695         return 0;
1696 }
1697
1698 static int add_cluster(struct meta_cluster *cluster,
1699                        struct mdrestore_struct *mdres, u64 *next)
1700 {
1701         struct meta_cluster_item *item;
1702         struct meta_cluster_header *header = &cluster->header;
1703         struct async_work *async;
1704         u64 bytenr;
1705         u32 i, nritems;
1706         int ret;
1707
1708         BUG_ON(mdres->num_items);
1709         mdres->compress_method = header->compress;
1710
1711         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1712         nritems = le32_to_cpu(header->nritems);
1713         for (i = 0; i < nritems; i++) {
1714                 item = &cluster->items[i];
1715                 async = calloc(1, sizeof(*async));
1716                 if (!async) {
1717                         fprintf(stderr, "Error allocating async\n");
1718                         return -ENOMEM;
1719                 }
1720                 async->start = le64_to_cpu(item->bytenr);
1721                 async->bufsize = le32_to_cpu(item->size);
1722                 async->buffer = malloc(async->bufsize);
1723                 if (!async->buffer) {
1724                         fprintf(stderr, "Error allocing async buffer\n");
1725                         free(async);
1726                         return -ENOMEM;
1727                 }
1728                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1729                 if (ret != 1) {
1730                         fprintf(stderr, "Error reading buffer %d\n", errno);
1731                         free(async->buffer);
1732                         free(async);
1733                         return -EIO;
1734                 }
1735                 bytenr += async->bufsize;
1736
1737                 pthread_mutex_lock(&mdres->mutex);
1738                 if (async->start == BTRFS_SUPER_INFO_OFFSET) {
1739                         ret = fill_mdres_info(mdres, async);
1740                         if (ret) {
1741                                 fprintf(stderr, "Error setting up restore\n");
1742                                 pthread_mutex_unlock(&mdres->mutex);
1743                                 free(async->buffer);
1744                                 free(async);
1745                                 return ret;
1746                         }
1747                 }
1748                 list_add_tail(&async->list, &mdres->list);
1749                 mdres->num_items++;
1750                 pthread_cond_signal(&mdres->cond);
1751                 pthread_mutex_unlock(&mdres->mutex);
1752         }
1753         if (bytenr & BLOCK_MASK) {
1754                 char buffer[BLOCK_MASK];
1755                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1756
1757                 bytenr += size;
1758                 ret = fread(buffer, size, 1, mdres->in);
1759                 if (ret != 1) {
1760                         fprintf(stderr, "Error reading in buffer %d\n", errno);
1761                         return -EIO;
1762                 }
1763         }
1764         *next = bytenr;
1765         return 0;
1766 }
1767
1768 static int wait_for_worker(struct mdrestore_struct *mdres)
1769 {
1770         int ret = 0;
1771
1772         pthread_mutex_lock(&mdres->mutex);
1773         ret = mdres->error;
1774         while (!ret && mdres->num_items > 0) {
1775                 struct timespec ts = {
1776                         .tv_sec = 0,
1777                         .tv_nsec = 10000000,
1778                 };
1779                 pthread_mutex_unlock(&mdres->mutex);
1780                 nanosleep(&ts, NULL);
1781                 pthread_mutex_lock(&mdres->mutex);
1782                 ret = mdres->error;
1783         }
1784         pthread_mutex_unlock(&mdres->mutex);
1785         return ret;
1786 }
1787
1788 static int restore_metadump(const char *input, FILE *out, int old_restore,
1789                             int num_threads)
1790 {
1791         struct meta_cluster *cluster = NULL;
1792         struct meta_cluster_header *header;
1793         struct mdrestore_struct mdrestore;
1794         u64 bytenr = 0;
1795         FILE *in = NULL;
1796         int ret = 0;
1797
1798         if (!strcmp(input, "-")) {
1799                 in = stdin;
1800         } else {
1801                 in = fopen(input, "r");
1802                 if (!in) {
1803                         perror("unable to open metadump image");
1804                         return 1;
1805                 }
1806         }
1807
1808         cluster = malloc(BLOCK_SIZE);
1809         if (!cluster) {
1810                 fprintf(stderr, "Error allocating cluster\n");
1811                 if (in != stdin)
1812                         fclose(in);
1813                 return -ENOMEM;
1814         }
1815
1816         ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads);
1817         if (ret) {
1818                 fprintf(stderr, "Error initing mdrestore %d\n", ret);
1819                 if (in != stdin)
1820                         fclose(in);
1821                 free(cluster);
1822                 return ret;
1823         }
1824
1825         while (1) {
1826                 ret = fread(cluster, BLOCK_SIZE, 1, in);
1827                 if (!ret)
1828                         break;
1829
1830                 header = &cluster->header;
1831                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
1832                     le64_to_cpu(header->bytenr) != bytenr) {
1833                         fprintf(stderr, "bad header in metadump image\n");
1834                         ret = -EIO;
1835                         break;
1836                 }
1837                 ret = add_cluster(cluster, &mdrestore, &bytenr);
1838                 if (ret) {
1839                         fprintf(stderr, "Error adding cluster\n");
1840                         break;
1841                 }
1842
1843                 ret = wait_for_worker(&mdrestore);
1844                 if (ret) {
1845                         fprintf(stderr, "One of the threads errored out %d\n",
1846                                 ret);
1847                         break;
1848                 }
1849         }
1850
1851         mdrestore_destroy(&mdrestore);
1852         free(cluster);
1853         if (in != stdin)
1854                 fclose(in);
1855         return ret;
1856 }
1857
1858 static void print_usage(void)
1859 {
1860         fprintf(stderr, "usage: btrfs-image [options] source target\n");
1861         fprintf(stderr, "\t-r      \trestore metadump image\n");
1862         fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
1863         fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
1864         fprintf(stderr, "\t-o      \tdon't mess with the chunk tree when restoring\n");
1865         fprintf(stderr, "\t-s      \tsanitize file names, use once to just use garbage, use twice if you want crc collisions");
1866         fprintf(stderr, "\t-w      \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
1867         exit(1);
1868 }
1869
1870 int main(int argc, char *argv[])
1871 {
1872         char *source;
1873         char *target;
1874         int num_threads = 0;
1875         int compress_level = 0;
1876         int create = 1;
1877         int old_restore = 0;
1878         int walk_trees = 0;
1879         int ret;
1880         int sanitize = 0;
1881         FILE *out;
1882
1883         while (1) {
1884                 int c = getopt(argc, argv, "rc:t:osw");
1885                 if (c < 0)
1886                         break;
1887                 switch (c) {
1888                 case 'r':
1889                         create = 0;
1890                         break;
1891                 case 't':
1892                         num_threads = atoi(optarg);
1893                         if (num_threads <= 0 || num_threads > 32)
1894                                 print_usage();
1895                         break;
1896                 case 'c':
1897                         compress_level = atoi(optarg);
1898                         if (compress_level < 0 || compress_level > 9)
1899                                 print_usage();
1900                         break;
1901                 case 'o':
1902                         old_restore = 1;
1903                         break;
1904                 case 's':
1905                         sanitize++;
1906                         break;
1907                 case 'w':
1908                         walk_trees = 1;
1909                         break;
1910                 default:
1911                         print_usage();
1912                 }
1913         }
1914
1915         if (old_restore && create)
1916                 print_usage();
1917
1918         argc = argc - optind;
1919         if (argc != 2)
1920                 print_usage();
1921         source = argv[optind];
1922         target = argv[optind + 1];
1923
1924         if (create && !strcmp(target, "-")) {
1925                 out = stdout;
1926         } else {
1927                 out = fopen(target, "w+");
1928                 if (!out) {
1929                         perror("unable to create target file");
1930                         exit(1);
1931                 }
1932         }
1933
1934         if (num_threads == 0 && compress_level > 0) {
1935                 num_threads = sysconf(_SC_NPROCESSORS_ONLN);
1936                 if (num_threads <= 0)
1937                         num_threads = 1;
1938         }
1939
1940         if (create)
1941                 ret = create_metadump(source, out, num_threads,
1942                                       compress_level, sanitize, walk_trees);
1943         else
1944                 ret = restore_metadump(source, out, old_restore, 1);
1945
1946         if (out == stdout)
1947                 fflush(out);
1948         else
1949                 fclose(out);
1950
1951         return ret;
1952 }