265cbe620837a5bc2c24932a4414813142f8b22e
[platform/upstream/btrfs-progs.git] / btrfs-image.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <pthread.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <dirent.h>
29 #include <zlib.h>
30 #include "kerncompat.h"
31 #include "crc32c.h"
32 #include "ctree.h"
33 #include "disk-io.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "version.h"
37 #include "volumes.h"
38
39 #define HEADER_MAGIC            0xbd5c25e27295668bULL
40 #define MAX_PENDING_SIZE        (256 * 1024)
41 #define BLOCK_SIZE              1024
42 #define BLOCK_MASK              (BLOCK_SIZE - 1)
43
44 #define COMPRESS_NONE           0
45 #define COMPRESS_ZLIB           1
46
47 struct meta_cluster_item {
48         __le64 bytenr;
49         __le32 size;
50 } __attribute__ ((__packed__));
51
52 struct meta_cluster_header {
53         __le64 magic;
54         __le64 bytenr;
55         __le32 nritems;
56         u8 compress;
57 } __attribute__ ((__packed__));
58
59 /* cluster header + index items + buffers */
60 struct meta_cluster {
61         struct meta_cluster_header header;
62         struct meta_cluster_item items[];
63 } __attribute__ ((__packed__));
64
65 #define ITEMS_PER_CLUSTER ((BLOCK_SIZE - sizeof(struct meta_cluster)) / \
66                            sizeof(struct meta_cluster_item))
67
68 struct async_work {
69         struct list_head list;
70         struct list_head ordered;
71         u64 start;
72         u64 size;
73         u8 *buffer;
74         size_t bufsize;
75         int error;
76 };
77
78 struct metadump_struct {
79         struct btrfs_root *root;
80         FILE *out;
81
82         struct meta_cluster *cluster;
83
84         pthread_t *threads;
85         size_t num_threads;
86         pthread_mutex_t mutex;
87         pthread_cond_t cond;
88
89         struct list_head list;
90         struct list_head ordered;
91         size_t num_items;
92         size_t num_ready;
93
94         u64 pending_start;
95         u64 pending_size;
96
97         int compress_level;
98         int done;
99         int data;
100 };
101
102 struct mdrestore_struct {
103         FILE *in;
104         FILE *out;
105
106         pthread_t *threads;
107         size_t num_threads;
108         pthread_mutex_t mutex;
109         pthread_cond_t cond;
110
111         struct list_head list;
112         size_t num_items;
113
114         int compress_method;
115         int done;
116         int error;
117 };
118
119 static void csum_block(u8 *buf, size_t len)
120 {
121         char result[BTRFS_CRC32_SIZE];
122         u32 crc = ~(u32)0;
123         crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len - BTRFS_CSUM_SIZE);
124         btrfs_csum_final(crc, result);
125         memcpy(buf, result, BTRFS_CRC32_SIZE);
126 }
127
128 /*
129  * zero inline extents and csum items
130  */
131 static void zero_items(u8 *dst, struct extent_buffer *src)
132 {
133         struct btrfs_file_extent_item *fi;
134         struct btrfs_item *item;
135         struct btrfs_key key;
136         u32 nritems = btrfs_header_nritems(src);
137         size_t size;
138         unsigned long ptr;
139         int i, extent_type;
140
141         for (i = 0; i < nritems; i++) {
142                 item = btrfs_item_nr(src, i);
143                 btrfs_item_key_to_cpu(src, &key, i);
144                 if (key.type == BTRFS_CSUM_ITEM_KEY) {
145                         size = btrfs_item_size_nr(src, i);
146                         memset(dst + btrfs_leaf_data(src) +
147                                btrfs_item_offset_nr(src, i), 0, size);
148                         continue;
149                 }
150                 if (key.type != BTRFS_EXTENT_DATA_KEY)
151                         continue;
152
153                 fi = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
154                 extent_type = btrfs_file_extent_type(src, fi);
155                 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
156                         continue;
157
158                 ptr = btrfs_file_extent_inline_start(fi);
159                 size = btrfs_file_extent_inline_item_len(src, item);
160                 memset(dst + ptr, 0, size);
161         }
162 }
163
164 /*
165  * copy buffer and zero useless data in the buffer
166  */
167 static void copy_buffer(u8 *dst, struct extent_buffer *src)
168 {
169         int level;
170         size_t size;
171         u32 nritems;
172
173         memcpy(dst, src->data, src->len);
174         if (src->start == BTRFS_SUPER_INFO_OFFSET)
175                 return;
176
177         level = btrfs_header_level(src);
178         nritems = btrfs_header_nritems(src);
179
180         if (nritems == 0) {
181                 size = sizeof(struct btrfs_header);
182                 memset(dst + size, 0, src->len - size);
183         } else if (level == 0) {
184                 size = btrfs_leaf_data(src) +
185                         btrfs_item_offset_nr(src, nritems - 1) -
186                         btrfs_item_nr_offset(nritems);
187                 memset(dst + btrfs_item_nr_offset(nritems), 0, size);
188                 zero_items(dst, src);
189         } else {
190                 size = offsetof(struct btrfs_node, ptrs) +
191                         sizeof(struct btrfs_key_ptr) * nritems;
192                 memset(dst + size, 0, src->len - size);
193         }
194         csum_block(dst, src->len);
195 }
196
197 static void *dump_worker(void *data)
198 {
199         struct metadump_struct *md = (struct metadump_struct *)data;
200         struct async_work *async;
201         int ret;
202
203         while (1) {
204                 pthread_mutex_lock(&md->mutex);
205                 while (list_empty(&md->list)) {
206                         if (md->done) {
207                                 pthread_mutex_unlock(&md->mutex);
208                                 goto out;
209                         }
210                         pthread_cond_wait(&md->cond, &md->mutex);
211                 }
212                 async = list_entry(md->list.next, struct async_work, list);
213                 list_del_init(&async->list);
214                 pthread_mutex_unlock(&md->mutex);
215
216                 if (md->compress_level > 0) {
217                         u8 *orig = async->buffer;
218
219                         async->bufsize = compressBound(async->size);
220                         async->buffer = malloc(async->bufsize);
221
222                         ret = compress2(async->buffer,
223                                          (unsigned long *)&async->bufsize,
224                                          orig, async->size, md->compress_level);
225
226                         if (ret != Z_OK)
227                                 async->error = 1;
228
229                         free(orig);
230                 }
231
232                 pthread_mutex_lock(&md->mutex);
233                 md->num_ready++;
234                 pthread_mutex_unlock(&md->mutex);
235         }
236 out:
237         pthread_exit(NULL);
238 }
239
240 static void meta_cluster_init(struct metadump_struct *md, u64 start)
241 {
242         struct meta_cluster_header *header;
243
244         md->num_items = 0;
245         md->num_ready = 0;
246         header = &md->cluster->header;
247         header->magic = cpu_to_le64(HEADER_MAGIC);
248         header->bytenr = cpu_to_le64(start);
249         header->nritems = cpu_to_le32(0);
250         header->compress = md->compress_level > 0 ?
251                            COMPRESS_ZLIB : COMPRESS_NONE;
252 }
253
254 static int metadump_init(struct metadump_struct *md, struct btrfs_root *root,
255                          FILE *out, int num_threads, int compress_level)
256 {
257         int i, ret;
258
259         memset(md, 0, sizeof(*md));
260         pthread_cond_init(&md->cond, NULL);
261         pthread_mutex_init(&md->mutex, NULL);
262         INIT_LIST_HEAD(&md->list);
263         INIT_LIST_HEAD(&md->ordered);
264         md->root = root;
265         md->out = out;
266         md->pending_start = (u64)-1;
267         md->compress_level = compress_level;
268         md->cluster = calloc(1, BLOCK_SIZE);
269         if (!md->cluster) {
270                 pthread_cond_destroy(&md->cond);
271                 pthread_mutex_destroy(&md->mutex);
272                 return -ENOMEM;
273         }
274
275         meta_cluster_init(md, 0);
276         if (!num_threads)
277                 return 0;
278
279         md->num_threads = num_threads;
280         md->threads = calloc(num_threads, sizeof(pthread_t));
281         if (!md->threads) {
282                 free(md->cluster);
283                 pthread_cond_destroy(&md->cond);
284                 pthread_mutex_destroy(&md->mutex);
285                 return -ENOMEM;
286         }
287
288         for (i = 0; i < num_threads; i++) {
289                 ret = pthread_create(md->threads + i, NULL, dump_worker, md);
290                 if (ret)
291                         break;
292         }
293
294         if (ret) {
295                 pthread_mutex_lock(&md->mutex);
296                 md->done = 1;
297                 pthread_cond_broadcast(&md->cond);
298                 pthread_mutex_unlock(&md->mutex);
299
300                 for (i--; i >= 0; i--)
301                         pthread_join(md->threads[i], NULL);
302
303                 pthread_cond_destroy(&md->cond);
304                 pthread_mutex_destroy(&md->mutex);
305                 free(md->cluster);
306                 free(md->threads);
307         }
308
309         return ret;
310 }
311
312 static void metadump_destroy(struct metadump_struct *md)
313 {
314         int i;
315         pthread_mutex_lock(&md->mutex);
316         md->done = 1;
317         pthread_cond_broadcast(&md->cond);
318         pthread_mutex_unlock(&md->mutex);
319
320         for (i = 0; i < md->num_threads; i++)
321                 pthread_join(md->threads[i], NULL);
322
323         pthread_cond_destroy(&md->cond);
324         pthread_mutex_destroy(&md->mutex);
325         free(md->threads);
326         free(md->cluster);
327 }
328
329 static int write_zero(FILE *out, size_t size)
330 {
331         static char zero[BLOCK_SIZE];
332         return fwrite(zero, size, 1, out);
333 }
334
335 static int write_buffers(struct metadump_struct *md, u64 *next)
336 {
337         struct meta_cluster_header *header = &md->cluster->header;
338         struct meta_cluster_item *item;
339         struct async_work *async;
340         u64 bytenr = 0;
341         u32 nritems = 0;
342         int ret;
343         int err = 0;
344
345         if (list_empty(&md->ordered))
346                 goto out;
347
348         /* wait until all buffers are compressed */
349         while (md->num_items > md->num_ready) {
350                 struct timespec ts = {
351                         .tv_sec = 0,
352                         .tv_nsec = 10000000,
353                 };
354                 pthread_mutex_unlock(&md->mutex);
355                 nanosleep(&ts, NULL);
356                 pthread_mutex_lock(&md->mutex);
357         }
358
359         /* setup and write index block */
360         list_for_each_entry(async, &md->ordered, ordered) {
361                 item = md->cluster->items + nritems;
362                 item->bytenr = cpu_to_le64(async->start);
363                 item->size = cpu_to_le32(async->bufsize);
364                 nritems++;
365         }
366         header->nritems = cpu_to_le32(nritems);
367
368         ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out);
369         if (ret != 1) {
370                 fprintf(stderr, "Error writing out cluster: %d\n", errno);
371                 return -EIO;
372         }
373
374         /* write buffers */
375         bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE;
376         while (!list_empty(&md->ordered)) {
377                 async = list_entry(md->ordered.next, struct async_work,
378                                    ordered);
379                 list_del_init(&async->ordered);
380
381                 bytenr += async->bufsize;
382                 if (!err)
383                         ret = fwrite(async->buffer, async->bufsize, 1,
384                                      md->out);
385                 if (ret != 1) {
386                         err = -EIO;
387                         ret = 0;
388                         fprintf(stderr, "Error writing out cluster: %d\n",
389                                 errno);
390                 }
391
392                 free(async->buffer);
393                 free(async);
394         }
395
396         /* zero unused space in the last block */
397         if (!err && bytenr & BLOCK_MASK) {
398                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
399
400                 bytenr += size;
401                 ret = write_zero(md->out, size);
402                 if (ret != 1) {
403                         fprintf(stderr, "Error zeroing out buffer: %d\n",
404                                 errno);
405                         err = -EIO;
406                 }
407         }
408 out:
409         *next = bytenr;
410         return err;
411 }
412
413 static int read_data_extent(struct metadump_struct *md,
414                             struct async_work *async)
415 {
416         struct btrfs_multi_bio *multi = NULL;
417         struct btrfs_device *device;
418         u64 bytes_left = async->size;
419         u64 logical = async->start;
420         u64 offset = 0;
421         u64 bytenr;
422         u64 read_len;
423         ssize_t done;
424         int fd;
425         int ret;
426
427         while (bytes_left) {
428                 read_len = bytes_left;
429                 ret = btrfs_map_block(&md->root->fs_info->mapping_tree, READ,
430                                       logical, &read_len, &multi, 0, NULL);
431                 if (ret) {
432                         fprintf(stderr, "Couldn't map data block %d\n", ret);
433                         return ret;
434                 }
435
436                 device = multi->stripes[0].dev;
437
438                 if (device->fd == 0) {
439                         fprintf(stderr,
440                                 "Device we need to read from is not open\n");
441                         free(multi);
442                         return -EIO;
443                 }
444                 fd = device->fd;
445                 bytenr = multi->stripes[0].physical;
446                 free(multi);
447
448                 read_len = min(read_len, bytes_left);
449                 done = pread64(fd, async->buffer+offset, read_len, bytenr);
450                 if (done < read_len) {
451                         if (done < 0)
452                                 fprintf(stderr, "Error reading extent %d\n",
453                                         errno);
454                         else
455                                 fprintf(stderr, "Short read\n");
456                         return -EIO;
457                 }
458
459                 bytes_left -= done;
460                 offset += done;
461                 logical += done;
462         }
463
464         return 0;
465 }
466
467 static int flush_pending(struct metadump_struct *md, int done)
468 {
469         struct async_work *async = NULL;
470         struct extent_buffer *eb;
471         u64 blocksize = md->root->nodesize;
472         u64 start;
473         u64 size;
474         size_t offset;
475         int ret = 0;
476
477         if (md->pending_size) {
478                 async = calloc(1, sizeof(*async));
479                 if (!async)
480                         return -ENOMEM;
481
482                 async->start = md->pending_start;
483                 async->size = md->pending_size;
484                 async->bufsize = async->size;
485                 async->buffer = malloc(async->bufsize);
486                 if (!async->buffer) {
487                         free(async);
488                         return -ENOMEM;
489                 }
490                 offset = 0;
491                 start = async->start;
492                 size = async->size;
493
494                 if (md->data) {
495                         ret = read_data_extent(md, async);
496                         if (ret) {
497                                 free(async->buffer);
498                                 free(async);
499                                 return ret;
500                         }
501                 }
502
503                 while (!md->data && size > 0) {
504                         eb = read_tree_block(md->root, start, blocksize, 0);
505                         if (!eb) {
506                                 free(async->buffer);
507                                 free(async);
508                                 fprintf(stderr,
509                                         "Error reading metadata block\n");
510                                 return -EIO;
511                         }
512                         copy_buffer(async->buffer + offset, eb);
513                         free_extent_buffer(eb);
514                         start += blocksize;
515                         offset += blocksize;
516                         size -= blocksize;
517                 }
518
519                 md->pending_start = (u64)-1;
520                 md->pending_size = 0;
521         } else if (!done) {
522                 return 0;
523         }
524
525         pthread_mutex_lock(&md->mutex);
526         if (async) {
527                 list_add_tail(&async->ordered, &md->ordered);
528                 md->num_items++;
529                 if (md->compress_level > 0) {
530                         list_add_tail(&async->list, &md->list);
531                         pthread_cond_signal(&md->cond);
532                 } else {
533                         md->num_ready++;
534                 }
535         }
536         if (md->num_items >= ITEMS_PER_CLUSTER || done) {
537                 ret = write_buffers(md, &start);
538                 if (ret)
539                         fprintf(stderr, "Error writing buffers %d\n",
540                                 errno);
541                 else
542                         meta_cluster_init(md, start);
543         }
544         pthread_mutex_unlock(&md->mutex);
545         return ret;
546 }
547
548 static int add_extent(u64 start, u64 size, struct metadump_struct *md,
549                       int data)
550 {
551         int ret;
552         if (md->data != data ||
553             md->pending_size + size > MAX_PENDING_SIZE ||
554             md->pending_start + md->pending_size != start) {
555                 ret = flush_pending(md, 0);
556                 if (ret)
557                         return ret;
558                 md->pending_start = start;
559         }
560         readahead_tree_block(md->root, start, size, 0);
561         md->pending_size += size;
562         md->data = data;
563         return 0;
564 }
565
566 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
567 static int is_tree_block(struct btrfs_root *extent_root,
568                          struct btrfs_path *path, u64 bytenr)
569 {
570         struct extent_buffer *leaf;
571         struct btrfs_key key;
572         u64 ref_objectid;
573         int ret;
574
575         leaf = path->nodes[0];
576         while (1) {
577                 struct btrfs_extent_ref_v0 *ref_item;
578                 path->slots[0]++;
579                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
580                         ret = btrfs_next_leaf(extent_root, path);
581                         if (ret < 0)
582                                 return ret;
583                         if (ret > 0)
584                                 break;
585                         leaf = path->nodes[0];
586                 }
587                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
588                 if (key.objectid != bytenr)
589                         break;
590                 if (key.type != BTRFS_EXTENT_REF_V0_KEY)
591                         continue;
592                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
593                                           struct btrfs_extent_ref_v0);
594                 ref_objectid = btrfs_ref_objectid_v0(leaf, ref_item);
595                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID)
596                         return 1;
597                 break;
598         }
599         return 0;
600 }
601 #endif
602
603 static int copy_log_blocks(struct btrfs_root *root, struct extent_buffer *eb,
604                            struct metadump_struct *metadump,
605                            int log_root_tree)
606 {
607         struct extent_buffer *tmp;
608         struct btrfs_root_item *ri;
609         struct btrfs_key key;
610         u64 bytenr;
611         int level;
612         int nritems = 0;
613         int i = 0;
614         int ret;
615
616         ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0);
617         if (ret) {
618                 fprintf(stderr, "Error adding metadata block\n");
619                 return ret;
620         }
621
622         if (btrfs_header_level(eb) == 0 && !log_root_tree)
623                 return 0;
624
625         level = btrfs_header_level(eb);
626         nritems = btrfs_header_nritems(eb);
627         for (i = 0; i < nritems; i++) {
628                 if (level == 0) {
629                         btrfs_item_key_to_cpu(eb, &key, i);
630                         if (key.type != BTRFS_ROOT_ITEM_KEY)
631                                 continue;
632                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
633                         bytenr = btrfs_disk_root_bytenr(eb, ri);
634                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
635                         if (!tmp) {
636                                 fprintf(stderr,
637                                         "Error reading log root block\n");
638                                 return -EIO;
639                         }
640                         ret = copy_log_blocks(root, tmp, metadump, 0);
641                         free_extent_buffer(tmp);
642                         if (ret)
643                                 return ret;
644                 } else {
645                         bytenr = btrfs_node_blockptr(eb, i);
646                         tmp = read_tree_block(root, bytenr, root->leafsize, 0);
647                         if (!tmp) {
648                                 fprintf(stderr, "Error reading log block\n");
649                                 return -EIO;
650                         }
651                         ret = copy_log_blocks(root, tmp, metadump,
652                                               log_root_tree);
653                         free_extent_buffer(tmp);
654                         if (ret)
655                                 return ret;
656                 }
657         }
658
659         return 0;
660 }
661
662 static int copy_log_trees(struct btrfs_root *root,
663                           struct metadump_struct *metadump,
664                           struct btrfs_path *path)
665 {
666         u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy);
667
668         if (blocknr == 0)
669                 return 0;
670
671         if (!root->fs_info->log_root_tree ||
672             !root->fs_info->log_root_tree->node) {
673                 fprintf(stderr, "Error copying tree log, it wasn't setup\n");
674                 return -EIO;
675         }
676
677         return copy_log_blocks(root, root->fs_info->log_root_tree->node,
678                                metadump, 1);
679 }
680
681 static int copy_space_cache(struct btrfs_root *root,
682                             struct metadump_struct *metadump,
683                             struct btrfs_path *path)
684 {
685         struct extent_buffer *leaf;
686         struct btrfs_file_extent_item *fi;
687         struct btrfs_key key;
688         u64 bytenr, num_bytes;
689         int ret;
690
691         root = root->fs_info->tree_root;
692
693         key.objectid = 0;
694         key.type = BTRFS_EXTENT_DATA_KEY;
695         key.offset = 0;
696
697         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
698         if (ret < 0) {
699                 fprintf(stderr, "Error searching for free space inode %d\n",
700                         ret);
701                 return ret;
702         }
703
704         while (1) {
705                 leaf = path->nodes[0];
706                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
707                         ret = btrfs_next_leaf(root, path);
708                         if (ret < 0) {
709                                 fprintf(stderr, "Error going to next leaf "
710                                         "%d\n", ret);
711                                 return ret;
712                         }
713                         if (ret > 0)
714                                 break;
715                         leaf = path->nodes[0];
716                 }
717
718                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
719                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
720                         path->slots[0]++;
721                         continue;
722                 }
723
724                 fi = btrfs_item_ptr(leaf, path->slots[0],
725                                     struct btrfs_file_extent_item);
726                 if (btrfs_file_extent_type(leaf, fi) !=
727                     BTRFS_FILE_EXTENT_REG) {
728                         path->slots[0]++;
729                         continue;
730                 }
731
732                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
733                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
734                 ret = add_extent(bytenr, num_bytes, metadump, 1);
735                 if (ret) {
736                         fprintf(stderr, "Error adding space cache blocks %d\n",
737                                 ret);
738                         btrfs_release_path(root, path);
739                         return ret;
740                 }
741                 path->slots[0]++;
742         }
743
744         return 0;
745 }
746
747 static int create_metadump(const char *input, FILE *out, int num_threads,
748                            int compress_level)
749 {
750         struct btrfs_root *root;
751         struct btrfs_root *extent_root;
752         struct btrfs_path *path = NULL;
753         struct extent_buffer *leaf;
754         struct btrfs_extent_item *ei;
755         struct btrfs_key key;
756         struct metadump_struct metadump;
757         u64 bytenr;
758         u64 num_bytes;
759         int ret;
760         int err = 0;
761
762         root = open_ctree(input, 0, 0);
763         if (!root) {
764                 fprintf(stderr, "Open ctree failed\n");
765                 return -EIO;
766         }
767
768         BUG_ON(root->nodesize != root->leafsize);
769
770         ret = metadump_init(&metadump, root, out, num_threads,
771                             compress_level);
772         if (ret) {
773                 fprintf(stderr, "Error initing metadump %d\n", ret);
774                 close_ctree(root);
775                 return ret;
776         }
777
778         ret = add_extent(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump, 0);
779         if (ret) {
780                 fprintf(stderr, "Error adding metadata %d\n", ret);
781                 err = ret;
782                 goto out;
783         }
784
785         extent_root = root->fs_info->extent_root;
786         path = btrfs_alloc_path();
787         if (!path) {
788                 fprintf(stderr, "Out of memory allocing path\n");
789                 err = -ENOMEM;
790                 goto out;
791         }
792         bytenr = BTRFS_SUPER_INFO_OFFSET + 4096;
793         key.objectid = bytenr;
794         key.type = BTRFS_EXTENT_ITEM_KEY;
795         key.offset = 0;
796
797         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
798         if (ret < 0) {
799                 fprintf(stderr, "Error searching extent root %d\n", ret);
800                 err = ret;
801                 goto out;
802         }
803
804         while (1) {
805                 leaf = path->nodes[0];
806                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
807                         ret = btrfs_next_leaf(extent_root, path);
808                         if (ret < 0) {
809                                 fprintf(stderr, "Error going to next leaf %d"
810                                         "\n", ret);
811                                 err = ret;
812                                 goto out;
813                         }
814                         if (ret > 0)
815                                 break;
816                         leaf = path->nodes[0];
817                 }
818
819                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
820                 if (key.objectid < bytenr ||
821                     key.type != BTRFS_EXTENT_ITEM_KEY) {
822                         path->slots[0]++;
823                         continue;
824                 }
825
826                 bytenr = key.objectid;
827                 num_bytes = key.offset;
828
829                 if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
830                         ei = btrfs_item_ptr(leaf, path->slots[0],
831                                             struct btrfs_extent_item);
832                         if (btrfs_extent_flags(leaf, ei) &
833                             BTRFS_EXTENT_FLAG_TREE_BLOCK) {
834                                 ret = add_extent(bytenr, num_bytes, &metadump,
835                                                  0);
836                                 if (ret) {
837                                         fprintf(stderr, "Error adding block "
838                                                 "%d\n", ret);
839                                         err = ret;
840                                         goto out;
841                                 }
842                         }
843                 } else {
844 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
845                         ret = is_tree_block(extent_root, path, bytenr);
846                         if (ret < 0) {
847                                 fprintf(stderr, "Error checking tree block "
848                                         "%d\n", ret);
849                                 err = ret;
850                                 goto out;
851                         }
852
853                         if (ret) {
854                                 ret = add_extent(bytenr, num_bytes, &metadump,
855                                                  0);
856                                 if (ret) {
857                                         fprintf(stderr, "Error adding block "
858                                                 "%d\n", ret);
859                                         err = ret;
860                                         goto out;
861                                 }
862                         }
863 #else
864                         fprintf(stderr, "Either extent tree corruption or "
865                                 "you haven't built with V0 support\n");
866                         err = -EIO;
867                         goto out;
868 #endif
869                 }
870                 bytenr += num_bytes;
871         }
872
873         btrfs_release_path(root, path);
874
875         ret = copy_log_trees(root, &metadump, path);
876         if (ret) {
877                 err = ret;
878                 goto out;
879         }
880
881         ret = copy_space_cache(root, &metadump, path);
882 out:
883         ret = flush_pending(&metadump, 1);
884         if (ret) {
885                 if (!err)
886                         ret = err;
887                 fprintf(stderr, "Error flushing pending %d\n", ret);
888         }
889
890         metadump_destroy(&metadump);
891
892         btrfs_free_path(path);
893         ret = close_ctree(root);
894         return err ? err : ret;
895 }
896
897 static void update_super(u8 *buffer)
898 {
899         struct btrfs_super_block *super = (struct btrfs_super_block *)buffer;
900         struct btrfs_chunk *chunk;
901         struct btrfs_disk_key *key;
902         u32 sectorsize = btrfs_super_sectorsize(super);
903         u64 flags = btrfs_super_flags(super);
904
905         flags |= BTRFS_SUPER_FLAG_METADUMP;
906         btrfs_set_super_flags(super, flags);
907
908         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
909         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
910                                        sizeof(struct btrfs_disk_key));
911
912         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
913         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
914         btrfs_set_disk_key_offset(key, 0);
915
916         btrfs_set_stack_chunk_length(chunk, (u64)-1);
917         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
918         btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
919         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
920         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
921         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
922         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
923         btrfs_set_stack_chunk_num_stripes(chunk, 1);
924         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
925         chunk->stripe.devid = super->dev_item.devid;
926         chunk->stripe.offset = cpu_to_le64(0);
927         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
928         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
929         csum_block(buffer, 4096);
930 }
931
932 static void *restore_worker(void *data)
933 {
934         struct mdrestore_struct *mdres = (struct mdrestore_struct *)data;
935         struct async_work *async;
936         size_t size;
937         u8 *buffer;
938         u8 *outbuf;
939         int outfd;
940         int ret;
941
942         outfd = fileno(mdres->out);
943         buffer = malloc(MAX_PENDING_SIZE * 2);
944         if (!buffer) {
945                 fprintf(stderr, "Error allocing buffer\n");
946                 pthread_mutex_lock(&mdres->mutex);
947                 if (!mdres->error)
948                         mdres->error = -ENOMEM;
949                 pthread_mutex_unlock(&mdres->mutex);
950                 goto out;
951         }
952
953         while (1) {
954                 int err = 0;
955
956                 pthread_mutex_lock(&mdres->mutex);
957                 while (list_empty(&mdres->list)) {
958                         if (mdres->done) {
959                                 pthread_mutex_unlock(&mdres->mutex);
960                                 goto out;
961                         }
962                         pthread_cond_wait(&mdres->cond, &mdres->mutex);
963                 }
964                 async = list_entry(mdres->list.next, struct async_work, list);
965                 list_del_init(&async->list);
966                 pthread_mutex_unlock(&mdres->mutex);
967
968                 if (mdres->compress_method == COMPRESS_ZLIB) {
969                         size = MAX_PENDING_SIZE * 2;
970                         ret = uncompress(buffer, (unsigned long *)&size,
971                                          async->buffer, async->bufsize);
972                         if (ret != Z_OK) {
973                                 fprintf(stderr, "Error decompressing %d\n",
974                                         ret);
975                                 err = -EIO;
976                         }
977                         outbuf = buffer;
978                 } else {
979                         outbuf = async->buffer;
980                         size = async->bufsize;
981                 }
982
983                 if (async->start == BTRFS_SUPER_INFO_OFFSET)
984                         update_super(outbuf);
985
986                 ret = pwrite64(outfd, outbuf, size, async->start);
987                 if (ret < size) {
988                         if (ret < 0) {
989                                 fprintf(stderr, "Error writing to device %d\n",
990                                         errno);
991                                 err = errno;
992                         } else {
993                                 fprintf(stderr, "Short write\n");
994                                 err = -EIO;
995                         }
996                 }
997
998                 pthread_mutex_lock(&mdres->mutex);
999                 if (err && !mdres->error)
1000                         mdres->error = err;
1001                 mdres->num_items--;
1002                 pthread_mutex_unlock(&mdres->mutex);
1003
1004                 free(async->buffer);
1005                 free(async);
1006         }
1007 out:
1008         free(buffer);
1009         pthread_exit(NULL);
1010 }
1011
1012 static void mdrestore_destroy(struct mdrestore_struct *mdres)
1013 {
1014         int i;
1015         pthread_mutex_lock(&mdres->mutex);
1016         mdres->done = 1;
1017         pthread_cond_broadcast(&mdres->cond);
1018         pthread_mutex_unlock(&mdres->mutex);
1019
1020         for (i = 0; i < mdres->num_threads; i++)
1021                 pthread_join(mdres->threads[i], NULL);
1022
1023         pthread_cond_destroy(&mdres->cond);
1024         pthread_mutex_destroy(&mdres->mutex);
1025         free(mdres->threads);
1026 }
1027
1028 static int mdrestore_init(struct mdrestore_struct *mdres,
1029                           FILE *in, FILE *out, int num_threads)
1030 {
1031         int i, ret = 0;
1032
1033         memset(mdres, 0, sizeof(*mdres));
1034         pthread_cond_init(&mdres->cond, NULL);
1035         pthread_mutex_init(&mdres->mutex, NULL);
1036         INIT_LIST_HEAD(&mdres->list);
1037         mdres->in = in;
1038         mdres->out = out;
1039
1040         if (!num_threads)
1041                 return 0;
1042
1043         mdres->num_threads = num_threads;
1044         mdres->threads = calloc(num_threads, sizeof(pthread_t));
1045         if (!mdres->threads)
1046                 return -ENOMEM;
1047         for (i = 0; i < num_threads; i++) {
1048                 ret = pthread_create(mdres->threads + i, NULL, restore_worker,
1049                                      mdres);
1050                 if (ret)
1051                         break;
1052         }
1053         if (ret)
1054                 mdrestore_destroy(mdres);
1055         return ret;
1056 }
1057
1058 static int add_cluster(struct meta_cluster *cluster,
1059                        struct mdrestore_struct *mdres, u64 *next)
1060 {
1061         struct meta_cluster_item *item;
1062         struct meta_cluster_header *header = &cluster->header;
1063         struct async_work *async;
1064         u64 bytenr;
1065         u32 i, nritems;
1066         int ret;
1067
1068         BUG_ON(mdres->num_items);
1069         mdres->compress_method = header->compress;
1070
1071         bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE;
1072         nritems = le32_to_cpu(header->nritems);
1073         for (i = 0; i < nritems; i++) {
1074                 item = &cluster->items[i];
1075                 async = calloc(1, sizeof(*async));
1076                 if (!async) {
1077                         fprintf(stderr, "Error allocating async\n");
1078                         return -ENOMEM;
1079                 }
1080                 async->start = le64_to_cpu(item->bytenr);
1081                 async->bufsize = le32_to_cpu(item->size);
1082                 async->buffer = malloc(async->bufsize);
1083                 if (!async->buffer) {
1084                         fprintf(stderr, "Error allocing async buffer\n");
1085                         free(async);
1086                         return -ENOMEM;
1087                 }
1088                 ret = fread(async->buffer, async->bufsize, 1, mdres->in);
1089                 if (ret != 1) {
1090                         fprintf(stderr, "Error reading buffer %d\n", errno);
1091                         free(async->buffer);
1092                         free(async);
1093                         return -EIO;
1094                 }
1095                 bytenr += async->bufsize;
1096
1097                 pthread_mutex_lock(&mdres->mutex);
1098                 list_add_tail(&async->list, &mdres->list);
1099                 mdres->num_items++;
1100                 pthread_cond_signal(&mdres->cond);
1101                 pthread_mutex_unlock(&mdres->mutex);
1102         }
1103         if (bytenr & BLOCK_MASK) {
1104                 char buffer[BLOCK_MASK];
1105                 size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK);
1106
1107                 bytenr += size;
1108                 ret = fread(buffer, size, 1, mdres->in);
1109                 if (ret != 1) {
1110                         fprintf(stderr, "Error reading in buffer %d\n", errno);
1111                         return -EIO;
1112                 }
1113         }
1114         *next = bytenr;
1115         return 0;
1116 }
1117
1118 static int wait_for_worker(struct mdrestore_struct *mdres)
1119 {
1120         int ret = 0;
1121
1122         pthread_mutex_lock(&mdres->mutex);
1123         ret = mdres->error;
1124         while (!ret && mdres->num_items > 0) {
1125                 struct timespec ts = {
1126                         .tv_sec = 0,
1127                         .tv_nsec = 10000000,
1128                 };
1129                 pthread_mutex_unlock(&mdres->mutex);
1130                 nanosleep(&ts, NULL);
1131                 pthread_mutex_lock(&mdres->mutex);
1132                 ret = mdres->error;
1133         }
1134         pthread_mutex_unlock(&mdres->mutex);
1135         return ret;
1136 }
1137
1138 static int restore_metadump(const char *input, FILE *out, int num_threads)
1139 {
1140         struct meta_cluster *cluster = NULL;
1141         struct meta_cluster_header *header;
1142         struct mdrestore_struct mdrestore;
1143         u64 bytenr = 0;
1144         FILE *in = NULL;
1145         int ret = 0;
1146
1147         if (!strcmp(input, "-")) {
1148                 in = stdin;
1149         } else {
1150                 in = fopen(input, "r");
1151                 if (!in) {
1152                         perror("unable to open metadump image");
1153                         return 1;
1154                 }
1155         }
1156
1157         cluster = malloc(BLOCK_SIZE);
1158         if (!cluster) {
1159                 fprintf(stderr, "Error allocating cluster\n");
1160                 if (in != stdin)
1161                         fclose(in);
1162                 return -ENOMEM;
1163         }
1164
1165         ret = mdrestore_init(&mdrestore, in, out, num_threads);
1166         if (ret) {
1167                 fprintf(stderr, "Error initing mdrestore %d\n", ret);
1168                 if (in != stdin)
1169                         fclose(in);
1170                 free(cluster);
1171                 return ret;
1172         }
1173
1174         while (1) {
1175                 ret = fread(cluster, BLOCK_SIZE, 1, in);
1176                 if (!ret)
1177                         break;
1178
1179                 header = &cluster->header;
1180                 if (le64_to_cpu(header->magic) != HEADER_MAGIC ||
1181                     le64_to_cpu(header->bytenr) != bytenr) {
1182                         fprintf(stderr, "bad header in metadump image\n");
1183                         ret = -EIO;
1184                         break;
1185                 }
1186                 ret = add_cluster(cluster, &mdrestore, &bytenr);
1187                 if (ret) {
1188                         fprintf(stderr, "Error adding cluster\n");
1189                         break;
1190                 }
1191
1192                 ret = wait_for_worker(&mdrestore);
1193                 if (ret) {
1194                         fprintf(stderr, "One of the threads errored out %d\n",
1195                                 ret);
1196                         break;
1197                 }
1198         }
1199
1200         mdrestore_destroy(&mdrestore);
1201         free(cluster);
1202         if (in != stdin)
1203                 fclose(in);
1204         return ret;
1205 }
1206
1207 static void print_usage(void)
1208 {
1209         fprintf(stderr, "usage: btrfs-image [options] source target\n");
1210         fprintf(stderr, "\t-r      \trestore metadump image\n");
1211         fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
1212         fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
1213         exit(1);
1214 }
1215
1216 int main(int argc, char *argv[])
1217 {
1218         char *source;
1219         char *target;
1220         int num_threads = 0;
1221         int compress_level = 0;
1222         int create = 1;
1223         int ret;
1224         FILE *out;
1225
1226         while (1) {
1227                 int c = getopt(argc, argv, "rc:t:");
1228                 if (c < 0)
1229                         break;
1230                 switch (c) {
1231                 case 'r':
1232                         create = 0;
1233                         break;
1234                 case 't':
1235                         num_threads = atoi(optarg);
1236                         if (num_threads <= 0 || num_threads > 32)
1237                                 print_usage();
1238                         break;
1239                 case 'c':
1240                         compress_level = atoi(optarg);
1241                         if (compress_level < 0 || compress_level > 9)
1242                                 print_usage();
1243                         break;
1244                 default:
1245                         print_usage();
1246                 }
1247         }
1248
1249         argc = argc - optind;
1250         if (argc != 2)
1251                 print_usage();
1252         source = argv[optind];
1253         target = argv[optind + 1];
1254
1255         if (create && !strcmp(target, "-")) {
1256                 out = stdout;
1257         } else {
1258                 out = fopen(target, "w+");
1259                 if (!out) {
1260                         perror("unable to create target file");
1261                         exit(1);
1262                 }
1263         }
1264
1265         if (num_threads == 0 && compress_level > 0) {
1266                 num_threads = sysconf(_SC_NPROCESSORS_ONLN);
1267                 if (num_threads <= 0)
1268                         num_threads = 1;
1269         }
1270
1271         if (create)
1272                 ret = create_metadump(source, out, num_threads,
1273                                       compress_level);
1274         else
1275                 ret = restore_metadump(source, out, 1);
1276
1277         if (out == stdout)
1278                 fflush(out);
1279         else
1280                 fclose(out);
1281
1282         return ret;
1283 }