Btrfs-progs: make btrfs-image copy the tree logs if they exist
[platform/upstream/btrfs-progs.git] / btrfs-image.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <pthread.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <dirent.h>
29 #include <zlib.h>
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "version.h"
37
38
39 #define HEADER_MAGIC            0xbd5c25e27295668bULL
40 #define MAX_PENDING_SIZE        (256 * 1024)
41 #define BLOCK_SIZE              1024
42 #define BLOCK_MASK              (BLOCK_SIZE - 1)
43
44 #define COMPRESS_NONE           0
45 #define COMPRESS_ZLIB           1
46
47 struct meta_cluster_item {
48         __le64 bytenr;
49         __le32 size;
50 } __attribute__ ((__packed__));
51
52 struct meta_cluster_header {
53         __le64 magic;
54         __le64 bytenr;
55         __le32 nritems;
56         u8 compress;
57 } __attribute__ ((__packed__));
58
59 /* cluster header + index items + buffers */
60 struct meta_cluster {
61         struct meta_cluster_header header;
62         struct meta_cluster_item items[];
63 } __attribute__ ((__packed__));
64
65 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
66                            sizeof(struct meta_cluster_item))
67
68 struct async_work {
69         struct list_head list;
70         struct list_head ordered;
71         u64 start;
72         u64 size;
73         u8 *buffer;
74         size_t bufsize;
75         int error;
76 };
77
78 struct metadump_struct {
79         struct btrfs_root *root;
80         FILE *out;
81
82         struct meta_cluster *cluster;
83
84         pthread_t *threads;
85         size_t num_threads;
86         pthread_mutex_t mutex;
87         pthread_cond_t cond;
88
89         struct list_head list;
90         struct list_head ordered;
91         size_t num_items;
92         size_t num_ready;
93
94         u64 pending_start;
95         u64 pending_size;
96
97         int compress_level;
98         int done;
99 };
100
101 struct mdrestore_struct {
102         FILE *in;
103         FILE *out;
104
105         pthread_t *threads;
106         size_t num_threads;
107         pthread_mutex_t mutex;
108         pthread_cond_t cond;
109
110         struct list_head list;
111         size_t num_items;
112
113         int compress_method;
114         int done;
115         int error;
116 };
117
118 static void csum_block(u8 *buf, size_t len)
119 {
120         char result[BTRFS_CRC32_SIZE];
121         u32 crc = ~(u32)0;
122         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
123         btrfs_csum_final(crc, result);
124         memcpy(buf, result, BTRFS_CRC32_SIZE);
125 }
126
127 /*
128  * zero inline extents and csum items
129  */
130 static void zero_items(u8 *dst, struct extent_buffer *src)
131 {
132         struct btrfs_file_extent_item *fi;
133         struct btrfs_item *item;
134         struct btrfs_key key;
135         u32 nritems = btrfs_header_nritems(src);
136         size_t size;
137         unsigned long ptr;
138         int i, extent_type;
139
140         for (i = 0; i < nritems; i++) {
141                 item = btrfs_item_nr(src, i);
142                 btrfs_item_key_to_cpu(src, &key, i);
143                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
144                         size = btrfs_item_size_nr(src, i);
145                         memset(dst + btrfs_leaf_data(src) +
146                                btrfs_item_offset_nr(src, i), 0, size);
147                         continue;
148                 }
149                 if (key.type != BTRFS_EXTENT_DATA_KEY)
150                         continue;
151
152                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
153                 extent_type = btrfs_file_extent_type(src, fi);
154                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
155                         continue;
156
157                 ptr = btrfs_file_extent_inline_start(fi);
158                 size = btrfs_file_extent_inline_item_len(src, item);
159                 memset(dst + ptr, 0, size);
160         }
161 }
162
163 /*
164  * copy buffer and zero useless data in the buffer
165  */
166 static void copy_buffer(u8 *dst, struct extent_buffer *src)
167 {
168         int level;
169         size_t size;
170         u32 nritems;
171
172         memcpy(dst, src->data, src->len);
173         if (src->start == BTRFS_SUPER_INFO_OFFSET)
174                 return;
175
176         level = btrfs_header_level(src);
177         nritems = btrfs_header_nritems(src);
178
179         if (nritems == 0) {
180                 size = sizeof(struct btrfs_header);
181                 memset(dst + size, 0, src->len - size);
182         } else if (level == 0) {
183                 size = btrfs_leaf_data(src) +
184                         btrfs_item_offset_nr(src, nritems - 1) -
185                         btrfs_item_nr_offset(nritems);
186                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
187                 zero_items(dst, src);
188         } else {
189                 size = offsetof(struct btrfs_node, ptrs) +
190                         sizeof(struct btrfs_key_ptr) * nritems;
191                 memset(dst + size, 0, src->len - size);
192         }
193         csum_block(dst, src->len);
194 }
195
196 static void *dump_worker(void *data)
197 {
198         struct metadump_struct *md = (struct metadump_struct *)data;
199         struct async_work *async;
200         int ret;
201
202         while (1) {
203                 pthread_mutex_lock(&md->mutex);
204                 while (list_empty(&md->list)) {
205                         if (md->done) {
206                                 pthread_mutex_unlock(&md->mutex);
207                                 goto out;
208                         }
209                         pthread_cond_wait(&md->cond, &md->mutex);
210                 }
211                 async = list_entry(md->list.next, struct async_work, list);
212                 list_del_init(&async->list);
213                 pthread_mutex_unlock(&md->mutex);
214
215                 if (md->compress_level > 0) {
216                         u8 *orig = async->buffer;
217
218                         async->bufsize = compressBound(async->size);
219                         async->buffer = malloc(async->bufsize);
220
221                         ret = compress2(async->buffer,
222                                          (unsigned long *)&async->bufsize,
223                                          orig, async->size, md->compress_level);
224
225                         if (ret != Z_OK)
226                                 async->error = 1;
227
228                         free(orig);
229                 }
230
231                 pthread_mutex_lock(&md->mutex);
232                 md->num_ready++;
233                 pthread_mutex_unlock(&md->mutex);
234         }
235 out:
236         pthread_exit(NULL);
237 }
238
239 static void meta_cluster_init(struct metadump_struct *md, u64 start)
240 {
241         struct meta_cluster_header *header;
242
243         md->num_items = 0;
244         md->num_ready = 0;
245         header = &md->cluster->header;
246         header->magic = cpu_to_le64(HEADER_MAGIC);
247         header->bytenr = cpu_to_le64(start);
248         header->nritems = cpu_to_le32(0);
249         header->compress = md->compress_level > 0 ?
250                            COMPRESS_ZLIB : COMPRESS_NONE;
251 }
252
253 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
254                          FILE *out, int num_threads, int compress_level)
255 {
256         int i, ret;
257
258         memset(md, 0, sizeof(*md));
259         pthread_cond_init(&md->cond, NULL);
260         pthread_mutex_init(&md->mutex, NULL);
261         INIT_LIST_HEAD(&md->list);
262         INIT_LIST_HEAD(&md->ordered);
263         md->root = root;
264         md->out = out;
265         md->pending_start = (u64)-1;
266         md->compress_level = compress_level;
267         md->cluster = calloc(1, BLOCK_SIZE);
268         if (!md->cluster) {
269                 pthread_cond_destroy(&md->cond);
270                 pthread_mutex_destroy(&md->mutex);
271                 return -ENOMEM;
272         }
273
274         meta_cluster_init(md, 0);
275         if (!num_threads)
276                 return 0;
277
278         md->num_threads = num_threads;
279         md->threads = calloc(num_threads, sizeof(pthread_t));
280         if (!md->threads) {
281                 free(md->cluster);
282                 pthread_cond_destroy(&md->cond);
283                 pthread_mutex_destroy(&md->mutex);
284                 return -ENOMEM;
285         }
286
287         for (i = 0; i < num_threads; i++) {
288                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
289                 if (ret)
290                         break;
291         }
292
293         if (ret) {
294                 pthread_mutex_lock(&md->mutex);
295                 md->done = 1;
296                 pthread_cond_broadcast(&md->cond);
297                 pthread_mutex_unlock(&md->mutex);
298
299                 for (i--; i >= 0; i--)
300                         pthread_join(md->threads[i], NULL);
301
302                 pthread_cond_destroy(&md->cond);
303                 pthread_mutex_destroy(&md->mutex);
304                 free(md->cluster);
305                 free(md->threads);
306         }
307
308         return ret;
309 }
310
311 static void metadump_destroy(struct metadump_struct *md)
312 {
313         int i;
314         pthread_mutex_lock(&md->mutex);
315         md->done = 1;
316         pthread_cond_broadcast(&md->cond);
317         pthread_mutex_unlock(&md->mutex);
318
319         for (i = 0; i < md->num_threads; i++)
320                 pthread_join(md->threads[i], NULL);
321
322         pthread_cond_destroy(&md->cond);
323         pthread_mutex_destroy(&md->mutex);
324         free(md->threads);
325         free(md->cluster);
326 }
327
328 static int write_zero(FILE *out, size_t size)
329 {
330         static char zero[BLOCK_SIZE];
331         return fwrite(zero, size, 1, out);
332 }
333
334 static int write_buffers(struct metadump_struct *md, u64 *next)
335 {
336         struct meta_cluster_header *header = &md->cluster->header;
337         struct meta_cluster_item *item;
338         struct async_work *async;
339         u64 bytenr = 0;
340         u32 nritems = 0;
341         int ret;
342         int err = 0;
343
344         if (list_empty(&md->ordered))
345                 goto out;
346
347         /* wait until all buffers are compressed */
348         while (md->num_items > md->num_ready) {
349                 struct timespec ts = {
350                         .tv_sec = 0,
351                         .tv_nsec = 10000000,
352                 };
353                 pthread_mutex_unlock(&md->mutex);
354                 nanosleep(&ts, NULL);
355                 pthread_mutex_lock(&md->mutex);
356         }
357
358         /* setup and write index block */
359         list_for_each_entry(async, &md->ordered, ordered) {
360                 item = md->cluster->items + nritems;
361                 item->bytenr = cpu_to_le64(async->start);
362                 item->size = cpu_to_le32(async->bufsize);
363                 nritems++;
364         }
365         header->nritems = cpu_to_le32(nritems);
366
367         ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
368         if (ret != 1) {
369                 fprintf(stderr, "Error writing out cluster: %d\n", errno);
370                 return -EIO;
371         }
372
373         /* write buffers */
374         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
375         while (!list_empty(&md->ordered)) {
376                 async = list_entry(md->ordered.next, struct async_work,
377                                    ordered);
378                 list_del_init(&async->ordered);
379
380                 bytenr += async->bufsize;
381                 if (!err)
382                         ret = fwrite(async->buffer, async->bufsize, 1,
383                                      md->out);
384                 if (ret != 1) {
385                         err = -EIO;
386                         ret = 0;
387                         fprintf(stderr, "Error writing out cluster: %d\n",
388                                 errno);
389                 }
390
391                 free(async->buffer);
392                 free(async);
393         }
394
395         /* zero unused space in the last block */
396         if (!err && bytenr & BLOCK_MASK) {
397                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
398
399                 bytenr += size;
400                 ret = write_zero(md->out, size);
401                 if (ret != 1) {
402                         fprintf(stderr, "Error zeroing out buffer: %d\n",
403                                 errno);
404                         err = -EIO;
405                 }
406         }
407 out:
408         *next = bytenr;
409         return err;
410 }
411
412 static int flush_pending(struct metadump_struct *md, int done)
413 {
414         struct async_work *async = NULL;
415         struct extent_buffer *eb;
416         u64 blocksize = md->root->nodesize;
417         u64 start;
418         u64 size;
419         size_t offset;
420         int ret = 0;
421
422         if (md->pending_size) {
423                 async = calloc(1, sizeof(*async));
424                 if (!async)
425                         return -ENOMEM;
426
427                 async->start = md->pending_start;
428                 async->size = md->pending_size;
429                 async->bufsize = async->size;
430                 async->buffer = malloc(async->bufsize);
431                 if (!async->buffer) {
432                         free(async);
433                         return -ENOMEM;
434                 }
435                 offset = 0;
436                 start = async->start;
437                 size = async->size;
438                 while (size > 0) {
439                         eb = read_tree_block(md->root, start, blocksize, 0);
440                         if (!eb) {
441                                 free(async->buffer);
442                                 free(async);
443                                 fprintf(stderr,
444                                         "Error reading metadata block\n");
445                                 return -EIO;
446                         }
447                         copy_buffer(async->buffer + offset, eb);
448                         free_extent_buffer(eb);
449                         start += blocksize;
450                         offset += blocksize;
451                         size -= blocksize;
452                 }
453
454                 md->pending_start = (u64)-1;
455                 md->pending_size = 0;
456         } else if (!done) {
457                 return 0;
458         }
459
460         pthread_mutex_lock(&md->mutex);
461         if (async) {
462                 list_add_tail(&async->ordered, &md->ordered);
463                 md->num_items++;
464                 if (md->compress_level > 0) {
465                         list_add_tail(&async->list, &md->list);
466                         pthread_cond_signal(&md->cond);
467                 } else {
468                         md->num_ready++;
469                 }
470         }
471         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
472                 ret = write_buffers(md, &start);
473                 if (ret)
474                         fprintf(stderr, "Error writing buffers %d\n",
475                                 errno);
476                 else
477                         meta_cluster_init(md, start);
478         }
479         pthread_mutex_unlock(&md->mutex);
480         return ret;
481 }
482
483 static int add_metadata(u64 start, u64 size, struct metadump_struct *md)
484 {
485         int ret;
486         if (md->pending_size + size > MAX_PENDING_SIZE ||
487             md->pending_start + md->pending_size != start) {
488                 ret = flush_pending(md, 0);
489                 if (ret)
490                         return ret;
491                 md->pending_start = start;
492         }
493         readahead_tree_block(md->root, start, size, 0);
494         md->pending_size += size;
495         return 0;
496 }
497
498 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
499 static int is_tree_block(struct btrfs_root *extent_root,
500                          struct btrfs_path *path, u64 bytenr)
501 {
502         struct extent_buffer *leaf;
503         struct btrfs_key key;
504         u64 ref_objectid;
505         int ret;
506
507         leaf = path->nodes[0];
508         while (1) {
509                 struct btrfs_extent_ref_v0 *ref_item;
510                 path->slots[0]++;
511                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
512                         ret = btrfs_next_leaf(extent_root, path);
513                         if (ret < 0)
514                                 return ret;
515                         if (ret > 0)
516                                 break;
517                         leaf = path->nodes[0];
518                 }
519                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
520                 if (key.objectid != bytenr)
521                         break;
522                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
523                         continue;
524                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
525                                           struct btrfs_extent_ref_v0);
526                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
527                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
528                         return 1;
529                 break;
530         }
531         return 0;
532 }
533 #endif
534
535 static int copy_log_blocks(struct btrfs_root *root, struct extent_buffer *eb,
536                            struct metadump_struct *metadump,
537                            int log_root_tree)
538 {
539         struct extent_buffer *tmp;
540         struct btrfs_root_item *ri;
541         struct btrfs_key key;
542         u64 bytenr;
543         int level;
544         int nritems = 0;
545         int i = 0;
546         int ret;
547
548         ret = add_metadata(btrfs_header_bytenr(eb), root->leafsize, metadump);
549         if (ret) {
550                 fprintf(stderr, "Error adding metadata block\n");
551                 return ret;
552         }
553
554         if (btrfs_header_level(eb) == 0 && !log_root_tree)
555                 return 0;
556
557         level = btrfs_header_level(eb);
558         nritems = btrfs_header_nritems(eb);
559         for (i = 0; i < nritems; i++) {
560                 if (level == 0) {
561                         btrfs_item_key_to_cpu(eb, &key, i);
562                         if (key.type != BTRFS_ROOT_ITEM_KEY)
563                                 continue;
564                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
565                         bytenr = btrfs_disk_root_bytenr(eb, ri);
566                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
567                         if (!tmp) {
568                                 fprintf(stderr,
569                                         "Error reading log root block\n");
570                                 return -EIO;
571                         }
572                         ret = copy_log_blocks(root, tmp, metadump, 0);
573                         free_extent_buffer(tmp);
574                         if (ret)
575                                 return ret;
576                 } else {
577                         bytenr = btrfs_node_blockptr(eb, i);
578                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
579                         if (!tmp) {
580                                 fprintf(stderr, "Error reading log block\n");
581                                 return -EIO;
582                         }
583                         ret = copy_log_blocks(root, tmp, metadump,
584                                               log_root_tree);
585                         free_extent_buffer(tmp);
586                         if (ret)
587                                 return ret;
588                 }
589         }
590
591         return 0;
592 }
593
594 static int copy_log_trees(struct btrfs_root *root,
595                           struct metadump_struct *metadump,
596                           struct btrfs_path *path)
597 {
598         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
599
600         if (blocknr == 0)
601                 return 0;
602
603         if (!root->fs_info->log_root_tree ||
604             !root->fs_info->log_root_tree->node) {
605                 fprintf(stderr, "Error copying tree log, it wasn't setup\n");
606                 return -EIO;
607         }
608
609         return copy_log_blocks(root, root->fs_info->log_root_tree->node,
610                                metadump, 1);
611 }
612
613 static int create_metadump(const char *input, FILE *out, int num_threads,
614                            int compress_level)
615 {
616         struct btrfs_root *root;
617         struct btrfs_root *extent_root;
618         struct btrfs_path *path = NULL;
619         struct extent_buffer *leaf;
620         struct btrfs_extent_item *ei;
621         struct btrfs_key key;
622         struct metadump_struct metadump;
623         u64 bytenr;
624         u64 num_bytes;
625         int ret;
626         int err = 0;
627
628         root = open_ctree(input, 0, 0);
629         if (!root) {
630                 fprintf(stderr, "Open ctree failed\n");
631                 return -EIO;
632         }
633
634         BUG_ON(root->nodesize != root->leafsize);
635
636         ret = metadump_init(&metadump, root, out, num_threads,
637                             compress_level);
638         if (ret) {
639                 fprintf(stderr, "Error initing metadump %d\n", ret);
640                 close_ctree(root);
641                 return ret;
642         }
643
644         ret = add_metadata(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump);
645         if (ret) {
646                 fprintf(stderr, "Error adding metadata %d\n", ret);
647                 err = ret;
648                 goto out;
649         }
650
651         extent_root = root->fs_info->extent_root;
652         path = btrfs_alloc_path();
653         if (!path) {
654                 fprintf(stderr, "Out of memory allocing path\n");
655                 err = -ENOMEM;
656                 goto out;
657         }
658         bytenr = BTRFS_SUPER_INFO_OFFSET + 4096;
659         key.objectid = bytenr;
660         key.type = BTRFS_EXTENT_ITEM_KEY;
661         key.offset = 0;
662
663         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
664         if (ret < 0) {
665                 fprintf(stderr, "Error searching extent root %d\n", ret);
666                 err = ret;
667                 goto out;
668         }
669
670         while (1) {
671                 leaf = path->nodes[0];
672                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
673                         ret = btrfs_next_leaf(extent_root, path);
674                         if (ret < 0) {
675                                 fprintf(stderr, "Error going to next leaf %d"
676                                         "\n", ret);
677                                 err = ret;
678                                 goto out;
679                         }
680                         if (ret > 0)
681                                 break;
682                         leaf = path->nodes[0];
683                 }
684
685                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
686                 if (key.objectid < bytenr ||
687                     key.type != BTRFS_EXTENT_ITEM_KEY) {
688                         path->slots[0]++;
689                         continue;
690                 }
691
692                 bytenr = key.objectid;
693                 num_bytes = key.offset;
694
695                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
696                         ei = btrfs_item_ptr(leaf, path->slots[0],
697                                             struct btrfs_extent_item);
698                         if (btrfs_extent_flags(leaf, ei) &
699                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
700                                 ret = add_metadata(bytenr, num_bytes,
701                                                    &metadump);
702                                 if (ret) {
703                                         fprintf(stderr, "Error adding block "
704                                                 "%d\n", ret);
705                                         err = ret;
706                                         goto out;
707                                 }
708                         }
709                 } else {
710 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
711                         ret = is_tree_block(extent_root, path, bytenr);
712                         if (ret < 0) {
713                                 fprintf(stderr, "Error checking tree block "
714                                         "%d\n", ret);
715                                 err = ret;
716                                 goto out;
717                         }
718
719                         if (ret) {
720                                 ret = add_metadata(bytenr, num_bytes,
721                                                    &metadump);
722                                 if (ret) {
723                                         fprintf(stderr, "Error adding block "
724                                                 "%d\n", ret);
725                                         err = ret;
726                                         goto out;
727                                 }
728                         }
729 #else
730                         fprintf(stderr, "Either extent tree corruption or "
731                                 "you haven't built with V0 support\n");
732                         err = -EIO;
733                         goto out;
734 #endif
735                 }
736                 bytenr += num_bytes;
737         }
738
739         ret = copy_log_trees(root, &metadump, path);
740         if (ret)
741                 err = ret;
742 out:
743         ret = flush_pending(&metadump, 1);
744         if (ret) {
745                 if (!err)
746                         ret = err;
747                 fprintf(stderr, "Error flushing pending %d\n", ret);
748         }
749
750         metadump_destroy(&metadump);
751
752         btrfs_free_path(path);
753         ret = close_ctree(root);
754         return err ? err : ret;
755 }
756
757 static void update_super(u8 *buffer)
758 {
759         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
760         struct btrfs_chunk *chunk;
761         struct btrfs_disk_key *key;
762         u32 sectorsize = btrfs_super_sectorsize(super);
763         u64 flags = btrfs_super_flags(super);
764
765         flags |= BTRFS_SUPER_FLAG_METADUMP;
766         btrfs_set_super_flags(super, flags);
767
768         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
769         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
770                                        sizeof(struct btrfs_disk_key));
771
772         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
773         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
774         btrfs_set_disk_key_offset(key, 0);
775
776         btrfs_set_stack_chunk_length(chunk, (u64)-1);
777         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
778         btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
779         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
780         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
781         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
782         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
783         btrfs_set_stack_chunk_num_stripes(chunk, 1);
784         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
785         chunk->stripe.devid = super->dev_item.devid;
786         chunk->stripe.offset = cpu_to_le64(0);
787         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
788         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
789         csum_block(buffer, 4096);
790 }
791
792 static void *restore_worker(void *data)
793 {
794         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
795         struct async_work *async;
796         size_t size;
797         u8 *buffer;
798         u8 *outbuf;
799         int outfd;
800         int ret;
801
802         outfd = fileno(mdres->out);
803         buffer = malloc(MAX_PENDING_SIZE * 2);
804         if (!buffer) {
805                 fprintf(stderr, "Error allocing buffer\n");
806                 pthread_mutex_lock(&mdres->mutex);
807                 if (!mdres->error)
808                         mdres->error = -ENOMEM;
809                 pthread_mutex_unlock(&mdres->mutex);
810                 goto out;
811         }
812
813         while (1) {
814                 int err = 0;
815
816                 pthread_mutex_lock(&mdres->mutex);
817                 while (list_empty(&mdres->list)) {
818                         if (mdres->done) {
819                                 pthread_mutex_unlock(&mdres->mutex);
820                                 goto out;
821                         }
822                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
823                 }
824                 async = list_entry(mdres->list.next, struct async_work, list);
825                 list_del_init(&async->list);
826                 pthread_mutex_unlock(&mdres->mutex);
827
828                 if (mdres->compress_method == COMPRESS_ZLIB) {
829                         size = MAX_PENDING_SIZE * 2;
830                         ret = uncompress(buffer, (unsigned long *)&size,
831                                          async->buffer, async->bufsize);
832                         if (ret != Z_OK) {
833                                 fprintf(stderr, "Error decompressing %d\n",
834                                         ret);
835                                 err = -EIO;
836                         }
837                         outbuf = buffer;
838                 } else {
839                         outbuf = async->buffer;
840                         size = async->bufsize;
841                 }
842
843                 if (async->start == BTRFS_SUPER_INFO_OFFSET)
844                         update_super(outbuf);
845
846                 ret = pwrite64(outfd, outbuf, size, async->start);
847                 if (ret < size) {
848                         if (ret < 0) {
849                                 fprintf(stderr, "Error writing to device %d\n",
850                                         errno);
851                                 err = errno;
852                         } else {
853                                 fprintf(stderr, "Short write\n");
854                                 err = -EIO;
855                         }
856                 }
857
858                 pthread_mutex_lock(&mdres->mutex);
859                 if (err && !mdres->error)
860                         mdres->error = err;
861                 mdres->num_items--;
862                 pthread_mutex_unlock(&mdres->mutex);
863
864                 free(async->buffer);
865                 free(async);
866         }
867 out:
868         free(buffer);
869         pthread_exit(NULL);
870 }
871
872 static void mdrestore_destroy(struct mdrestore_struct *mdres)
873 {
874         int i;
875         pthread_mutex_lock(&mdres->mutex);
876         mdres->done = 1;
877         pthread_cond_broadcast(&mdres->cond);
878         pthread_mutex_unlock(&mdres->mutex);
879
880         for (i = 0; i < mdres->num_threads; i++)
881                 pthread_join(mdres->threads[i], NULL);
882
883         pthread_cond_destroy(&mdres->cond);
884         pthread_mutex_destroy(&mdres->mutex);
885         free(mdres->threads);
886 }
887
888 static int mdrestore_init(struct mdrestore_struct *mdres,
889                           FILE *in, FILE *out, int num_threads)
890 {
891         int i, ret = 0;
892
893         memset(mdres, 0, sizeof(*mdres));
894         pthread_cond_init(&mdres->cond, NULL);
895         pthread_mutex_init(&mdres->mutex, NULL);
896         INIT_LIST_HEAD(&mdres->list);
897         mdres->in = in;
898         mdres->out = out;
899
900         if (!num_threads)
901                 return 0;
902
903         mdres->num_threads = num_threads;
904         mdres->threads = calloc(num_threads, sizeof(pthread_t));
905         if (!mdres->threads)
906                 return -ENOMEM;
907         for (i = 0; i < num_threads; i++) {
908                 ret = pthread_create(mdres->threads + i, NULL, restore_worker,
909                                      mdres);
910                 if (ret)
911                         break;
912         }
913         if (ret)
914                 mdrestore_destroy(mdres);
915         return ret;
916 }
917
918 static int add_cluster(struct meta_cluster *cluster,
919                        struct mdrestore_struct *mdres, u64 *next)
920 {
921         struct meta_cluster_item *item;
922         struct meta_cluster_header *header = &cluster->header;
923         struct async_work *async;
924         u64 bytenr;
925         u32 i, nritems;
926         int ret;
927
928         BUG_ON(mdres->num_items);
929         mdres->compress_method = header->compress;
930
931         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
932         nritems = le32_to_cpu(header->nritems);
933         for (i = 0; i < nritems; i++) {
934                 item = &cluster->items[i];
935                 async = calloc(1, sizeof(*async));
936                 if (!async) {
937                         fprintf(stderr, "Error allocating async\n");
938                         return -ENOMEM;
939                 }
940                 async->start = le64_to_cpu(item->bytenr);
941                 async->bufsize = le32_to_cpu(item->size);
942                 async->buffer = malloc(async->bufsize);
943                 if (!async->buffer) {
944                         fprintf(stderr, "Error allocing async buffer\n");
945                         free(async);
946                         return -ENOMEM;
947                 }
948                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
949                 if (ret != 1) {
950                         fprintf(stderr, "Error reading buffer %d\n", errno);
951                         free(async->buffer);
952                         free(async);
953                         return -EIO;
954                 }
955                 bytenr += async->bufsize;
956
957                 pthread_mutex_lock(&mdres->mutex);
958                 list_add_tail(&async->list, &mdres->list);
959                 mdres->num_items++;
960                 pthread_cond_signal(&mdres->cond);
961                 pthread_mutex_unlock(&mdres->mutex);
962         }
963         if (bytenr & BLOCK_MASK) {
964                 char buffer[BLOCK_MASK];
965                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
966
967                 bytenr += size;
968                 ret = fread(buffer, size, 1, mdres->in);
969                 if (ret != 1) {
970                         fprintf(stderr, "Error reading in buffer %d\n", errno);
971                         return -EIO;
972                 }
973         }
974         *next = bytenr;
975         return 0;
976 }
977
978 static int wait_for_worker(struct mdrestore_struct *mdres)
979 {
980         int ret = 0;
981
982         pthread_mutex_lock(&mdres->mutex);
983         ret = mdres->error;
984         while (!ret && mdres->num_items > 0) {
985                 struct timespec ts = {
986                         .tv_sec = 0,
987                         .tv_nsec = 10000000,
988                 };
989                 pthread_mutex_unlock(&mdres->mutex);
990                 nanosleep(&ts, NULL);
991                 pthread_mutex_lock(&mdres->mutex);
992                 ret = mdres->error;
993         }
994         pthread_mutex_unlock(&mdres->mutex);
995         return ret;
996 }
997
998 static int restore_metadump(const char *input, FILE *out, int num_threads)
999 {
1000         struct meta_cluster *cluster = NULL;
1001         struct meta_cluster_header *header;
1002         struct mdrestore_struct mdrestore;
1003         u64 bytenr = 0;
1004         FILE *in = NULL;
1005         int ret = 0;
1006
1007         if (!strcmp(input, "-")) {
1008                 in = stdin;
1009         } else {
1010                 in = fopen(input, "r");
1011                 if (!in) {
1012                         perror("unable to open metadump image");
1013                         return 1;
1014                 }
1015         }
1016
1017         cluster = malloc(BLOCK_SIZE);
1018         if (!cluster) {
1019                 fprintf(stderr, "Error allocating cluster\n");
1020                 if (in != stdin)
1021                         fclose(in);
1022                 return -ENOMEM;
1023         }
1024
1025         ret = mdrestore_init(&mdrestore, in, out, num_threads);
1026         if (ret) {
1027                 fprintf(stderr, "Error initing mdrestore %d\n", ret);
1028                 if (in != stdin)
1029                         fclose(in);
1030                 free(cluster);
1031                 return ret;
1032         }
1033
1034         while (1) {
1035                 ret = fread(cluster, BLOCK_SIZE, 1, in);
1036                 if (!ret)
1037                         break;
1038
1039                 header = &cluster->header;
1040                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
1041                     le64_to_cpu(header->bytenr) != bytenr) {
1042                         fprintf(stderr, "bad header in metadump image\n");
1043                         ret = -EIO;
1044                         break;
1045                 }
1046                 ret = add_cluster(cluster, &mdrestore, &bytenr);
1047                 if (ret) {
1048                         fprintf(stderr, "Error adding cluster\n");
1049                         break;
1050                 }
1051
1052                 ret = wait_for_worker(&mdrestore);
1053                 if (ret) {
1054                         fprintf(stderr, "One of the threads errored out %d\n",
1055                                 ret);
1056                         break;
1057                 }
1058         }
1059
1060         mdrestore_destroy(&mdrestore);
1061         free(cluster);
1062         if (in != stdin)
1063                 fclose(in);
1064         return ret;
1065 }
1066
1067 static void print_usage(void)
1068 {
1069         fprintf(stderr, "usage: btrfs-image [options] source target\n");
1070         fprintf(stderr, "\t-r      \trestore metadump image\n");
1071         fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
1072         fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
1073         exit(1);
1074 }
1075
1076 int main(int argc, char *argv[])
1077 {
1078         char *source;
1079         char *target;
1080         int num_threads = 0;
1081         int compress_level = 0;
1082         int create = 1;
1083         int ret;
1084         FILE *out;
1085
1086         while (1) {
1087                 int c = getopt(argc, argv, "rc:t:");
1088                 if (c < 0)
1089                         break;
1090                 switch (c) {
1091                 case 'r':
1092                         create = 0;
1093                         break;
1094                 case 't':
1095                         num_threads = atoi(optarg);
1096                         if (num_threads <= 0 || num_threads > 32)
1097                                 print_usage();
1098                         break;
1099                 case 'c':
1100                         compress_level = atoi(optarg);
1101                         if (compress_level < 0 || compress_level > 9)
1102                                 print_usage();
1103                         break;
1104                 default:
1105                         print_usage();
1106                 }
1107         }
1108
1109         argc = argc - optind;
1110         if (argc != 2)
1111                 print_usage();
1112         source = argv[optind];
1113         target = argv[optind + 1];
1114
1115         if (create && !strcmp(target, "-")) {
1116                 out = stdout;
1117         } else {
1118                 out = fopen(target, "w+");
1119                 if (!out) {
1120                         perror("unable to create target file");
1121                         exit(1);
1122                 }
1123         }
1124
1125         if (num_threads == 0 && compress_level > 0) {
1126                 num_threads = sysconf(_SC_NPROCESSORS_ONLN);
1127                 if (num_threads <= 0)
1128                         num_threads = 1;
1129         }
1130
1131         if (create)
1132                 ret = create_metadump(source, out, num_threads,
1133                                       compress_level);
1134         else
1135                 ret = restore_metadump(source, out, 1);
1136
1137         if (out == stdout)
1138                 fflush(out);
1139         else
1140                 fclose(out);
1141
1142         return ret;
1143 }