ad4f94c2a2abe522acf73d7fbebed1761f91362d
[platform/upstream/btrfs-progs.git] / btrfs-image.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <pthread.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <dirent.h>
29 #include <zlib.h>
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "version.h"
37
38
39 #define HEADER_MAGIC            0xbd5c25e27295668bULL
40 #define MAX_PENDING_SIZE        (256 * 1024)
41 #define BLOCK_SIZE              1024
42 #define BLOCK_MASK              (BLOCK_SIZE - 1)
43
44 #define COMPRESS_NONE           0
45 #define COMPRESS_ZLIB           1
46
47 struct meta_cluster_item {
48         __le64 bytenr;
49         __le32 size;
50 } __attribute__ ((__packed__));
51
52 struct meta_cluster_header {
53         __le64 magic;
54         __le64 bytenr;
55         __le32 nritems;
56         u8 compress;
57 } __attribute__ ((__packed__));
58
59 /* cluster header + index items + buffers */
60 struct meta_cluster {
61         struct meta_cluster_header header;
62         struct meta_cluster_item items[];
63 } __attribute__ ((__packed__));
64
65 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
66                            sizeof(struct meta_cluster_item))
67
68 struct async_work {
69         struct list_head list;
70         struct list_head ordered;
71         u64 start;
72         u64 size;
73         u8 *buffer;
74         size_t bufsize;
75         int error;
76 };
77
78 struct metadump_struct {
79         struct btrfs_root *root;
80         FILE *out;
81
82         struct meta_cluster *cluster;
83
84         pthread_t *threads;
85         size_t num_threads;
86         pthread_mutex_t mutex;
87         pthread_cond_t cond;
88
89         struct list_head list;
90         struct list_head ordered;
91         size_t num_items;
92         size_t num_ready;
93
94         u64 pending_start;
95         u64 pending_size;
96
97         int compress_level;
98         int done;
99 };
100
101 struct mdrestore_struct {
102         FILE *in;
103         FILE *out;
104
105         pthread_t *threads;
106         size_t num_threads;
107         pthread_mutex_t mutex;
108         pthread_cond_t cond;
109
110         struct list_head list;
111         size_t num_items;
112
113         int compress_method;
114         int done;
115         int error;
116 };
117
118 static void csum_block(u8 *buf, size_t len)
119 {
120         char result[BTRFS_CRC32_SIZE];
121         u32 crc = ~(u32)0;
122         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
123         btrfs_csum_final(crc, result);
124         memcpy(buf, result, BTRFS_CRC32_SIZE);
125 }
126
127 /*
128  * zero inline extents and csum items
129  */
130 static void zero_items(u8 *dst, struct extent_buffer *src)
131 {
132         struct btrfs_file_extent_item *fi;
133         struct btrfs_item *item;
134         struct btrfs_key key;
135         u32 nritems = btrfs_header_nritems(src);
136         size_t size;
137         unsigned long ptr;
138         int i, extent_type;
139
140         for (i = 0; i < nritems; i++) {
141                 item = btrfs_item_nr(src, i);
142                 btrfs_item_key_to_cpu(src, &key, i);
143                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
144                         size = btrfs_item_size_nr(src, i);
145                         memset(dst + btrfs_leaf_data(src) +
146                                btrfs_item_offset_nr(src, i), 0, size);
147                         continue;
148                 }
149                 if (key.type != BTRFS_EXTENT_DATA_KEY)
150                         continue;
151
152                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
153                 extent_type = btrfs_file_extent_type(src, fi);
154                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
155                         continue;
156
157                 ptr = btrfs_file_extent_inline_start(fi);
158                 size = btrfs_file_extent_inline_item_len(src, item);
159                 memset(dst + ptr, 0, size);
160         }
161 }
162
163 /*
164  * copy buffer and zero useless data in the buffer
165  */
166 static void copy_buffer(u8 *dst, struct extent_buffer *src)
167 {
168         int level;
169         size_t size;
170         u32 nritems;
171
172         memcpy(dst, src->data, src->len);
173         if (src->start == BTRFS_SUPER_INFO_OFFSET)
174                 return;
175
176         level = btrfs_header_level(src);
177         nritems = btrfs_header_nritems(src);
178
179         if (nritems == 0) {
180                 size = sizeof(struct btrfs_header);
181                 memset(dst + size, 0, src->len - size);
182         } else if (level == 0) {
183                 size = btrfs_leaf_data(src) +
184                         btrfs_item_offset_nr(src, nritems - 1) -
185                         btrfs_item_nr_offset(nritems);
186                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
187                 zero_items(dst, src);
188         } else {
189                 size = offsetof(struct btrfs_node, ptrs) +
190                         sizeof(struct btrfs_key_ptr) * nritems;
191                 memset(dst + size, 0, src->len - size);
192         }
193         csum_block(dst, src->len);
194 }
195
196 static void *dump_worker(void *data)
197 {
198         struct metadump_struct *md = (struct metadump_struct *)data;
199         struct async_work *async;
200         int ret;
201
202         while (1) {
203                 pthread_mutex_lock(&md->mutex);
204                 while (list_empty(&md->list)) {
205                         if (md->done) {
206                                 pthread_mutex_unlock(&md->mutex);
207                                 goto out;
208                         }
209                         pthread_cond_wait(&md->cond, &md->mutex);
210                 }
211                 async = list_entry(md->list.next, struct async_work, list);
212                 list_del_init(&async->list);
213                 pthread_mutex_unlock(&md->mutex);
214
215                 if (md->compress_level > 0) {
216                         u8 *orig = async->buffer;
217
218                         async->bufsize = compressBound(async->size);
219                         async->buffer = malloc(async->bufsize);
220
221                         ret = compress2(async->buffer,
222                                          (unsigned long *)&async->bufsize,
223                                          orig, async->size, md->compress_level);
224
225                         if (ret != Z_OK)
226                                 async->error = 1;
227
228                         free(orig);
229                 }
230
231                 pthread_mutex_lock(&md->mutex);
232                 md->num_ready++;
233                 pthread_mutex_unlock(&md->mutex);
234         }
235 out:
236         pthread_exit(NULL);
237 }
238
239 static void meta_cluster_init(struct metadump_struct *md, u64 start)
240 {
241         struct meta_cluster_header *header;
242
243         md->num_items = 0;
244         md->num_ready = 0;
245         header = &md->cluster->header;
246         header->magic = cpu_to_le64(HEADER_MAGIC);
247         header->bytenr = cpu_to_le64(start);
248         header->nritems = cpu_to_le32(0);
249         header->compress = md->compress_level > 0 ?
250                            COMPRESS_ZLIB : COMPRESS_NONE;
251 }
252
253 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
254                          FILE *out, int num_threads, int compress_level)
255 {
256         int i, ret;
257
258         memset(md, 0, sizeof(*md));
259         pthread_cond_init(&md->cond, NULL);
260         pthread_mutex_init(&md->mutex, NULL);
261         INIT_LIST_HEAD(&md->list);
262         INIT_LIST_HEAD(&md->ordered);
263         md->root = root;
264         md->out = out;
265         md->pending_start = (u64)-1;
266         md->compress_level = compress_level;
267         md->cluster = calloc(1, BLOCK_SIZE);
268         if (!md->cluster) {
269                 pthread_cond_destroy(&md->cond);
270                 pthread_mutex_destroy(&md->mutex);
271                 return -ENOMEM;
272         }
273
274         meta_cluster_init(md, 0);
275         if (!num_threads)
276                 return 0;
277
278         md->num_threads = num_threads;
279         md->threads = calloc(num_threads, sizeof(pthread_t));
280         if (!md->threads) {
281                 free(md->cluster);
282                 pthread_cond_destroy(&md->cond);
283                 pthread_mutex_destroy(&md->mutex);
284                 return -ENOMEM;
285         }
286
287         for (i = 0; i < num_threads; i++) {
288                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
289                 if (ret)
290                         break;
291         }
292
293         if (ret) {
294                 pthread_mutex_lock(&md->mutex);
295                 md->done = 1;
296                 pthread_cond_broadcast(&md->cond);
297                 pthread_mutex_unlock(&md->mutex);
298
299                 for (i--; i >= 0; i--)
300                         pthread_join(md->threads[i], NULL);
301
302                 pthread_cond_destroy(&md->cond);
303                 pthread_mutex_destroy(&md->mutex);
304                 free(md->cluster);
305                 free(md->threads);
306         }
307
308         return ret;
309 }
310
311 static void metadump_destroy(struct metadump_struct *md)
312 {
313         int i;
314         pthread_mutex_lock(&md->mutex);
315         md->done = 1;
316         pthread_cond_broadcast(&md->cond);
317         pthread_mutex_unlock(&md->mutex);
318
319         for (i = 0; i < md->num_threads; i++)
320                 pthread_join(md->threads[i], NULL);
321
322         pthread_cond_destroy(&md->cond);
323         pthread_mutex_destroy(&md->mutex);
324         free(md->threads);
325         free(md->cluster);
326 }
327
328 static int write_zero(FILE *out, size_t size)
329 {
330         static char zero[BLOCK_SIZE];
331         return fwrite(zero, size, 1, out);
332 }
333
334 static int write_buffers(struct metadump_struct *md, u64 *next)
335 {
336         struct meta_cluster_header *header = &md->cluster->header;
337         struct meta_cluster_item *item;
338         struct async_work *async;
339         u64 bytenr = 0;
340         u32 nritems = 0;
341         int ret;
342         int err = 0;
343
344         if (list_empty(&md->ordered))
345                 goto out;
346
347         /* wait until all buffers are compressed */
348         while (md->num_items > md->num_ready) {
349                 struct timespec ts = {
350                         .tv_sec = 0,
351                         .tv_nsec = 10000000,
352                 };
353                 pthread_mutex_unlock(&md->mutex);
354                 nanosleep(&ts, NULL);
355                 pthread_mutex_lock(&md->mutex);
356         }
357
358         /* setup and write index block */
359         list_for_each_entry(async, &md->ordered, ordered) {
360                 item = md->cluster->items + nritems;
361                 item->bytenr = cpu_to_le64(async->start);
362                 item->size = cpu_to_le32(async->bufsize);
363                 nritems++;
364         }
365         header->nritems = cpu_to_le32(nritems);
366
367         ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
368         if (ret != 1) {
369                 fprintf(stderr, "Error writing out cluster: %d\n", errno);
370                 return -EIO;
371         }
372
373         /* write buffers */
374         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
375         while (!list_empty(&md->ordered)) {
376                 async = list_entry(md->ordered.next, struct async_work,
377                                    ordered);
378                 list_del_init(&async->ordered);
379
380                 bytenr += async->bufsize;
381                 if (!err)
382                         ret = fwrite(async->buffer, async->bufsize, 1,
383                                      md->out);
384                 if (ret != 1) {
385                         err = -EIO;
386                         ret = 0;
387                         fprintf(stderr, "Error writing out cluster: %d\n",
388                                 errno);
389                 }
390
391                 free(async->buffer);
392                 free(async);
393         }
394
395         /* zero unused space in the last block */
396         if (!err && bytenr & BLOCK_MASK) {
397                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
398
399                 bytenr += size;
400                 ret = write_zero(md->out, size);
401                 if (ret != 1) {
402                         fprintf(stderr, "Error zeroing out buffer: %d\n",
403                                 errno);
404                         err = -EIO;
405                 }
406         }
407 out:
408         *next = bytenr;
409         return err;
410 }
411
412 static int flush_pending(struct metadump_struct *md, int done)
413 {
414         struct async_work *async = NULL;
415         struct extent_buffer *eb;
416         u64 blocksize = md->root->nodesize;
417         u64 start;
418         u64 size;
419         size_t offset;
420         int ret = 0;
421
422         if (md->pending_size) {
423                 async = calloc(1, sizeof(*async));
424                 if (!async)
425                         return -ENOMEM;
426
427                 async->start = md->pending_start;
428                 async->size = md->pending_size;
429                 async->bufsize = async->size;
430                 async->buffer = malloc(async->bufsize);
431                 if (!async->buffer) {
432                         free(async);
433                         return -ENOMEM;
434                 }
435                 offset = 0;
436                 start = async->start;
437                 size = async->size;
438                 while (size > 0) {
439                         eb = read_tree_block(md->root, start, blocksize, 0);
440                         if (!eb) {
441                                 free(async->buffer);
442                                 free(async);
443                                 fprintf(stderr,
444                                         "Error reading metadata block\n");
445                                 return -EIO;
446                         }
447                         copy_buffer(async->buffer + offset, eb);
448                         free_extent_buffer(eb);
449                         start += blocksize;
450                         offset += blocksize;
451                         size -= blocksize;
452                 }
453
454                 md->pending_start = (u64)-1;
455                 md->pending_size = 0;
456         } else if (!done) {
457                 return 0;
458         }
459
460         pthread_mutex_lock(&md->mutex);
461         if (async) {
462                 list_add_tail(&async->ordered, &md->ordered);
463                 md->num_items++;
464                 if (md->compress_level > 0) {
465                         list_add_tail(&async->list, &md->list);
466                         pthread_cond_signal(&md->cond);
467                 } else {
468                         md->num_ready++;
469                 }
470         }
471         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
472                 ret = write_buffers(md, &start);
473                 if (ret)
474                         fprintf(stderr, "Error writing buffers %d\n",
475                                 errno);
476                 else
477                         meta_cluster_init(md, start);
478         }
479         pthread_mutex_unlock(&md->mutex);
480         return ret;
481 }
482
483 static int add_metadata(u64 start, u64 size, struct metadump_struct *md)
484 {
485         int ret;
486         if (md->pending_size + size > MAX_PENDING_SIZE ||
487             md->pending_start + md->pending_size != start) {
488                 ret = flush_pending(md, 0);
489                 if (ret)
490                         return ret;
491                 md->pending_start = start;
492         }
493         readahead_tree_block(md->root, start, size, 0);
494         md->pending_size += size;
495         return 0;
496 }
497
498 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
499 static int is_tree_block(struct btrfs_root *extent_root,
500                          struct btrfs_path *path, u64 bytenr)
501 {
502         struct extent_buffer *leaf;
503         struct btrfs_key key;
504         u64 ref_objectid;
505         int ret;
506
507         leaf = path->nodes[0];
508         while (1) {
509                 struct btrfs_extent_ref_v0 *ref_item;
510                 path->slots[0]++;
511                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
512                         ret = btrfs_next_leaf(extent_root, path);
513                         if (ret < 0)
514                                 return ret;
515                         if (ret > 0)
516                                 break;
517                         leaf = path->nodes[0];
518                 }
519                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
520                 if (key.objectid != bytenr)
521                         break;
522                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
523                         continue;
524                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
525                                           struct btrfs_extent_ref_v0);
526                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
527                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
528                         return 1;
529                 break;
530         }
531         return 0;
532 }
533 #endif
534
535 static int create_metadump(const char *input, FILE *out, int num_threads,
536                            int compress_level)
537 {
538         struct btrfs_root *root;
539         struct btrfs_root *extent_root;
540         struct btrfs_path *path = NULL;
541         struct extent_buffer *leaf;
542         struct btrfs_extent_item *ei;
543         struct btrfs_key key;
544         struct metadump_struct metadump;
545         u64 bytenr;
546         u64 num_bytes;
547         int ret;
548         int err = 0;
549
550         root = open_ctree(input, 0, 0);
551         if (!root) {
552                 fprintf(stderr, "Open ctree failed\n");
553                 return -EIO;
554         }
555
556         BUG_ON(root->nodesize != root->leafsize);
557
558         ret = metadump_init(&metadump, root, out, num_threads,
559                             compress_level);
560         if (ret) {
561                 fprintf(stderr, "Error initing metadump %d\n", ret);
562                 close_ctree(root);
563                 return ret;
564         }
565
566         ret = add_metadata(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump);
567         if (ret) {
568                 fprintf(stderr, "Error adding metadata %d\n", ret);
569                 err = ret;
570                 goto out;
571         }
572
573         extent_root = root->fs_info->extent_root;
574         path = btrfs_alloc_path();
575         if (!path) {
576                 fprintf(stderr, "Out of memory allocing path\n");
577                 err = -ENOMEM;
578                 goto out;
579         }
580         bytenr = BTRFS_SUPER_INFO_OFFSET + 4096;
581         key.objectid = bytenr;
582         key.type = BTRFS_EXTENT_ITEM_KEY;
583         key.offset = 0;
584
585         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
586         if (ret < 0) {
587                 fprintf(stderr, "Error searching extent root %d\n", ret);
588                 err = ret;
589                 goto out;
590         }
591
592         while (1) {
593                 leaf = path->nodes[0];
594                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
595                         ret = btrfs_next_leaf(extent_root, path);
596                         if (ret < 0) {
597                                 fprintf(stderr, "Error going to next leaf %d"
598                                         "\n", ret);
599                                 err = ret;
600                                 goto out;
601                         }
602                         if (ret > 0)
603                                 break;
604                         leaf = path->nodes[0];
605                 }
606
607                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
608                 if (key.objectid < bytenr ||
609                     key.type != BTRFS_EXTENT_ITEM_KEY) {
610                         path->slots[0]++;
611                         continue;
612                 }
613
614                 bytenr = key.objectid;
615                 num_bytes = key.offset;
616
617                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
618                         ei = btrfs_item_ptr(leaf, path->slots[0],
619                                             struct btrfs_extent_item);
620                         if (btrfs_extent_flags(leaf, ei) &
621                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
622                                 ret = add_metadata(bytenr, num_bytes,
623                                                    &metadump);
624                                 if (ret) {
625                                         fprintf(stderr, "Error adding block "
626                                                 "%d\n", ret);
627                                         err = ret;
628                                         goto out;
629                                 }
630                         }
631                 } else {
632 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
633                         ret = is_tree_block(extent_root, path, bytenr);
634                         if (ret < 0) {
635                                 fprintf(stderr, "Error checking tree block "
636                                         "%d\n", ret);
637                                 err = ret;
638                                 goto out;
639                         }
640
641                         if (ret) {
642                                 ret = add_metadata(bytenr, num_bytes,
643                                                    &metadump);
644                                 if (ret) {
645                                         fprintf(stderr, "Error adding block "
646                                                 "%d\n", ret);
647                                         err = ret;
648                                         goto out;
649                                 }
650                         }
651 #else
652                         fprintf(stderr, "Either extent tree corruption or "
653                                 "you haven't built with V0 support\n");
654                         err = -EIO;
655                         goto out;
656 #endif
657                 }
658                 bytenr += num_bytes;
659         }
660
661 out:
662         ret = flush_pending(&metadump, 1);
663         if (ret) {
664                 if (!err)
665                         ret = err;
666                 fprintf(stderr, "Error flushing pending %d\n", ret);
667         }
668
669         metadump_destroy(&metadump);
670
671         btrfs_free_path(path);
672         ret = close_ctree(root);
673         return err ? err : ret;
674 }
675
676 static void update_super(u8 *buffer)
677 {
678         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
679         struct btrfs_chunk *chunk;
680         struct btrfs_disk_key *key;
681         u32 sectorsize = btrfs_super_sectorsize(super);
682         u64 flags = btrfs_super_flags(super);
683
684         flags |= BTRFS_SUPER_FLAG_METADUMP;
685         btrfs_set_super_flags(super, flags);
686
687         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
688         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
689                                        sizeof(struct btrfs_disk_key));
690
691         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
692         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
693         btrfs_set_disk_key_offset(key, 0);
694
695         btrfs_set_stack_chunk_length(chunk, (u64)-1);
696         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
697         btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
698         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
699         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
700         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
701         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
702         btrfs_set_stack_chunk_num_stripes(chunk, 1);
703         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
704         chunk->stripe.devid = super->dev_item.devid;
705         chunk->stripe.offset = cpu_to_le64(0);
706         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
707         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
708         csum_block(buffer, 4096);
709 }
710
711 static void *restore_worker(void *data)
712 {
713         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
714         struct async_work *async;
715         size_t size;
716         u8 *buffer;
717         u8 *outbuf;
718         int outfd;
719         int ret;
720
721         outfd = fileno(mdres->out);
722         buffer = malloc(MAX_PENDING_SIZE * 2);
723         if (!buffer) {
724                 fprintf(stderr, "Error allocing buffer\n");
725                 pthread_mutex_lock(&mdres->mutex);
726                 if (!mdres->error)
727                         mdres->error = -ENOMEM;
728                 pthread_mutex_unlock(&mdres->mutex);
729                 goto out;
730         }
731
732         while (1) {
733                 int err = 0;
734
735                 pthread_mutex_lock(&mdres->mutex);
736                 while (list_empty(&mdres->list)) {
737                         if (mdres->done) {
738                                 pthread_mutex_unlock(&mdres->mutex);
739                                 goto out;
740                         }
741                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
742                 }
743                 async = list_entry(mdres->list.next, struct async_work, list);
744                 list_del_init(&async->list);
745                 pthread_mutex_unlock(&mdres->mutex);
746
747                 if (mdres->compress_method == COMPRESS_ZLIB) {
748                         size = MAX_PENDING_SIZE * 2;
749                         ret = uncompress(buffer, (unsigned long *)&size,
750                                          async->buffer, async->bufsize);
751                         if (ret != Z_OK) {
752                                 fprintf(stderr, "Error decompressing %d\n",
753                                         ret);
754                                 err = -EIO;
755                         }
756                         outbuf = buffer;
757                 } else {
758                         outbuf = async->buffer;
759                         size = async->bufsize;
760                 }
761
762                 if (async->start == BTRFS_SUPER_INFO_OFFSET)
763                         update_super(outbuf);
764
765                 ret = pwrite64(outfd, outbuf, size, async->start);
766                 if (ret < size) {
767                         if (ret < 0) {
768                                 fprintf(stderr, "Error writing to device %d\n",
769                                         errno);
770                                 err = errno;
771                         } else {
772                                 fprintf(stderr, "Short write\n");
773                                 err = -EIO;
774                         }
775                 }
776
777                 pthread_mutex_lock(&mdres->mutex);
778                 if (err && !mdres->error)
779                         mdres->error = err;
780                 mdres->num_items--;
781                 pthread_mutex_unlock(&mdres->mutex);
782
783                 free(async->buffer);
784                 free(async);
785         }
786 out:
787         free(buffer);
788         pthread_exit(NULL);
789 }
790
791 static void mdrestore_destroy(struct mdrestore_struct *mdres)
792 {
793         int i;
794         pthread_mutex_lock(&mdres->mutex);
795         mdres->done = 1;
796         pthread_cond_broadcast(&mdres->cond);
797         pthread_mutex_unlock(&mdres->mutex);
798
799         for (i = 0; i < mdres->num_threads; i++)
800                 pthread_join(mdres->threads[i], NULL);
801
802         pthread_cond_destroy(&mdres->cond);
803         pthread_mutex_destroy(&mdres->mutex);
804         free(mdres->threads);
805 }
806
807 static int mdrestore_init(struct mdrestore_struct *mdres,
808                           FILE *in, FILE *out, int num_threads)
809 {
810         int i, ret = 0;
811
812         memset(mdres, 0, sizeof(*mdres));
813         pthread_cond_init(&mdres->cond, NULL);
814         pthread_mutex_init(&mdres->mutex, NULL);
815         INIT_LIST_HEAD(&mdres->list);
816         mdres->in = in;
817         mdres->out = out;
818
819         if (!num_threads)
820                 return 0;
821
822         mdres->num_threads = num_threads;
823         mdres->threads = calloc(num_threads, sizeof(pthread_t));
824         if (!mdres->threads)
825                 return -ENOMEM;
826         for (i = 0; i < num_threads; i++) {
827                 ret = pthread_create(mdres->threads + i, NULL, restore_worker,
828                                      mdres);
829                 if (ret)
830                         break;
831         }
832         if (ret)
833                 mdrestore_destroy(mdres);
834         return ret;
835 }
836
837 static int add_cluster(struct meta_cluster *cluster,
838                        struct mdrestore_struct *mdres, u64 *next)
839 {
840         struct meta_cluster_item *item;
841         struct meta_cluster_header *header = &cluster->header;
842         struct async_work *async;
843         u64 bytenr;
844         u32 i, nritems;
845         int ret;
846
847         BUG_ON(mdres->num_items);
848         mdres->compress_method = header->compress;
849
850         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
851         nritems = le32_to_cpu(header->nritems);
852         for (i = 0; i < nritems; i++) {
853                 item = &cluster->items[i];
854                 async = calloc(1, sizeof(*async));
855                 if (!async) {
856                         fprintf(stderr, "Error allocating async\n");
857                         return -ENOMEM;
858                 }
859                 async->start = le64_to_cpu(item->bytenr);
860                 async->bufsize = le32_to_cpu(item->size);
861                 async->buffer = malloc(async->bufsize);
862                 if (!async->buffer) {
863                         fprintf(stderr, "Error allocing async buffer\n");
864                         free(async);
865                         return -ENOMEM;
866                 }
867                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
868                 if (ret != 1) {
869                         fprintf(stderr, "Error reading buffer %d\n", errno);
870                         free(async->buffer);
871                         free(async);
872                         return -EIO;
873                 }
874                 bytenr += async->bufsize;
875
876                 pthread_mutex_lock(&mdres->mutex);
877                 list_add_tail(&async->list, &mdres->list);
878                 mdres->num_items++;
879                 pthread_cond_signal(&mdres->cond);
880                 pthread_mutex_unlock(&mdres->mutex);
881         }
882         if (bytenr & BLOCK_MASK) {
883                 char buffer[BLOCK_MASK];
884                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
885
886                 bytenr += size;
887                 ret = fread(buffer, size, 1, mdres->in);
888                 if (ret != 1) {
889                         fprintf(stderr, "Error reading in buffer %d\n", errno);
890                         return -EIO;
891                 }
892         }
893         *next = bytenr;
894         return 0;
895 }
896
897 static int wait_for_worker(struct mdrestore_struct *mdres)
898 {
899         int ret = 0;
900
901         pthread_mutex_lock(&mdres->mutex);
902         ret = mdres->error;
903         while (!ret && mdres->num_items > 0) {
904                 struct timespec ts = {
905                         .tv_sec = 0,
906                         .tv_nsec = 10000000,
907                 };
908                 pthread_mutex_unlock(&mdres->mutex);
909                 nanosleep(&ts, NULL);
910                 pthread_mutex_lock(&mdres->mutex);
911                 ret = mdres->error;
912         }
913         pthread_mutex_unlock(&mdres->mutex);
914         return ret;
915 }
916
917 static int restore_metadump(const char *input, FILE *out, int num_threads)
918 {
919         struct meta_cluster *cluster = NULL;
920         struct meta_cluster_header *header;
921         struct mdrestore_struct mdrestore;
922         u64 bytenr = 0;
923         FILE *in = NULL;
924         int ret = 0;
925
926         if (!strcmp(input, "-")) {
927                 in = stdin;
928         } else {
929                 in = fopen(input, "r");
930                 if (!in) {
931                         perror("unable to open metadump image");
932                         return 1;
933                 }
934         }
935
936         cluster = malloc(BLOCK_SIZE);
937         if (!cluster) {
938                 fprintf(stderr, "Error allocating cluster\n");
939                 if (in != stdin)
940                         fclose(in);
941                 return -ENOMEM;
942         }
943
944         ret = mdrestore_init(&mdrestore, in, out, num_threads);
945         if (ret) {
946                 fprintf(stderr, "Error initing mdrestore %d\n", ret);
947                 if (in != stdin)
948                         fclose(in);
949                 free(cluster);
950                 return ret;
951         }
952
953         while (1) {
954                 ret = fread(cluster, BLOCK_SIZE, 1, in);
955                 if (!ret)
956                         break;
957
958                 header = &cluster->header;
959                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
960                     le64_to_cpu(header->bytenr) != bytenr) {
961                         fprintf(stderr, "bad header in metadump image\n");
962                         ret = -EIO;
963                         break;
964                 }
965                 ret = add_cluster(cluster, &mdrestore, &bytenr);
966                 if (ret) {
967                         fprintf(stderr, "Error adding cluster\n");
968                         break;
969                 }
970
971                 ret = wait_for_worker(&mdrestore);
972                 if (ret) {
973                         fprintf(stderr, "One of the threads errored out %d\n",
974                                 ret);
975                         break;
976                 }
977         }
978
979         mdrestore_destroy(&mdrestore);
980         free(cluster);
981         if (in != stdin)
982                 fclose(in);
983         return ret;
984 }
985
986 static void print_usage(void)
987 {
988         fprintf(stderr, "usage: btrfs-image [options] source target\n");
989         fprintf(stderr, "\t-r      \trestore metadump image\n");
990         fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
991         fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
992         exit(1);
993 }
994
995 int main(int argc, char *argv[])
996 {
997         char *source;
998         char *target;
999         int num_threads = 0;
1000         int compress_level = 0;
1001         int create = 1;
1002         int ret;
1003         FILE *out;
1004
1005         while (1) {
1006                 int c = getopt(argc, argv, "rc:t:");
1007                 if (c < 0)
1008                         break;
1009                 switch (c) {
1010                 case 'r':
1011                         create = 0;
1012                         break;
1013                 case 't':
1014                         num_threads = atoi(optarg);
1015                         if (num_threads <= 0 || num_threads > 32)
1016                                 print_usage();
1017                         break;
1018                 case 'c':
1019                         compress_level = atoi(optarg);
1020                         if (compress_level < 0 || compress_level > 9)
1021                                 print_usage();
1022                         break;
1023                 default:
1024                         print_usage();
1025                 }
1026         }
1027
1028         argc = argc - optind;
1029         if (argc != 2)
1030                 print_usage();
1031         source = argv[optind];
1032         target = argv[optind + 1];
1033
1034         if (create && !strcmp(target, "-")) {
1035                 out = stdout;
1036         } else {
1037                 out = fopen(target, "w+");
1038                 if (!out) {
1039                         perror("unable to create target file");
1040                         exit(1);
1041                 }
1042         }
1043
1044         if (num_threads == 0 && compress_level > 0) {
1045                 num_threads = sysconf(_SC_NPROCESSORS_ONLN);
1046                 if (num_threads <= 0)
1047                         num_threads = 1;
1048         }
1049
1050         if (create)
1051                 ret = create_metadump(source, out, num_threads,
1052                                       compress_level);
1053         else
1054                 ret = restore_metadump(source, out, 1);
1055
1056         if (out == stdout)
1057                 fflush(out);
1058         else
1059                 fclose(out);
1060
1061         return ret;
1062 }