btrfs-progs: convert: Introduce function to calculate the available space
[platform/upstream/btrfs-progs.git] / btrfs-convert.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include "kerncompat.h"
20
21 #include <sys/ioctl.h>
22 #include <sys/mount.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <unistd.h>
29 #include <uuid/uuid.h>
30 #include <linux/limits.h>
31 #include <getopt.h>
32
33 #include "ctree.h"
34 #include "disk-io.h"
35 #include "volumes.h"
36 #include "transaction.h"
37 #include "crc32c.h"
38 #include "utils.h"
39 #include "task-utils.h"
40 #include <ext2fs/ext2_fs.h>
41 #include <ext2fs/ext2fs.h>
42 #include <ext2fs/ext2_ext_attr.h>
43
44 #define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO)
45 #define CONV_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID
46
47 /*
48  * Compatibility code for e2fsprogs 1.41 which doesn't support RO compat flag
49  * BIGALLOC.
50  * Unlike normal RO compat flag, BIGALLOC affects how e2fsprogs check used
51  * space, and btrfs-convert heavily relies on it.
52  */
53 #ifdef HAVE_OLD_E2FSPROGS
54 #define EXT2FS_CLUSTER_RATIO(fs)        (1)
55 #define EXT2_CLUSTERS_PER_GROUP(s)      (EXT2_BLOCKS_PER_GROUP(s))
56 #define EXT2FS_B2C(fs, blk)             (blk)
57 #endif
58
59 struct task_ctx {
60         uint32_t max_copy_inodes;
61         uint32_t cur_copy_inodes;
62         struct task_info *info;
63 };
64
65 static void *print_copied_inodes(void *p)
66 {
67         struct task_ctx *priv = p;
68         const char work_indicator[] = { '.', 'o', 'O', 'o' };
69         uint32_t count = 0;
70
71         task_period_start(priv->info, 1000 /* 1s */);
72         while (1) {
73                 count++;
74                 printf("copy inodes [%c] [%10d/%10d]\r",
75                        work_indicator[count % 4], priv->cur_copy_inodes,
76                        priv->max_copy_inodes);
77                 fflush(stdout);
78                 task_period_wait(priv->info);
79         }
80
81         return NULL;
82 }
83
84 static int after_copied_inodes(void *p)
85 {
86         printf("\n");
87         fflush(stdout);
88
89         return 0;
90 }
91
92 struct btrfs_convert_context;
93 struct btrfs_convert_operations {
94         const char *name;
95         int (*open_fs)(struct btrfs_convert_context *cctx, const char *devname);
96         int (*read_used_space)(struct btrfs_convert_context *cctx);
97         int (*alloc_block)(struct btrfs_convert_context *cctx, u64 goal,
98                            u64 *block_ret);
99         int (*alloc_block_range)(struct btrfs_convert_context *cctx, u64 goal,
100                            int num, u64 *block_ret);
101         int (*test_block)(struct btrfs_convert_context *cctx, u64 block);
102         void (*free_block)(struct btrfs_convert_context *cctx, u64 block);
103         void (*free_block_range)(struct btrfs_convert_context *cctx, u64 block,
104                            int num);
105         int (*copy_inodes)(struct btrfs_convert_context *cctx,
106                          struct btrfs_root *root, int datacsum,
107                          int packing, int noxattr, struct task_ctx *p);
108         void (*close_fs)(struct btrfs_convert_context *cctx);
109 };
110
111 struct btrfs_convert_context {
112         u32 blocksize;
113         u32 first_data_block;
114         u32 block_count;
115         u32 inodes_count;
116         u32 free_inodes_count;
117         u64 total_bytes;
118         char *volume_name;
119         const struct btrfs_convert_operations *convert_ops;
120
121         /* The accurate used space of old filesystem */
122         struct cache_tree used;
123
124         /* Batched ranges which must be covered by data chunks */
125         struct cache_tree data_chunks;
126
127         /* Free space which is not covered by data_chunks */
128         struct cache_tree free;
129
130         void *fs_data;
131 };
132
133 static void init_convert_context(struct btrfs_convert_context *cctx)
134 {
135         cache_tree_init(&cctx->used);
136         cache_tree_init(&cctx->data_chunks);
137         cache_tree_init(&cctx->free);
138 }
139
140 static void clean_convert_context(struct btrfs_convert_context *cctx)
141 {
142         free_extent_cache_tree(&cctx->used);
143         free_extent_cache_tree(&cctx->data_chunks);
144         free_extent_cache_tree(&cctx->free);
145 }
146
147 static inline int convert_alloc_block(struct btrfs_convert_context *cctx,
148                                       u64 goal, u64 *ret)
149 {
150         return  cctx->convert_ops->alloc_block(cctx, goal, ret);
151 }
152
153 static inline int convert_alloc_block_range(struct btrfs_convert_context *cctx,
154                                       u64 goal, int num, u64 *ret)
155 {
156         return  cctx->convert_ops->alloc_block_range(cctx, goal, num, ret);
157 }
158
159 static inline int convert_test_block(struct btrfs_convert_context *cctx,
160                                      u64 block)
161 {
162         return cctx->convert_ops->test_block(cctx, block);
163 }
164
165 static inline void convert_free_block(struct btrfs_convert_context *cctx,
166                                       u64 block)
167 {
168         cctx->convert_ops->free_block(cctx, block);
169 }
170
171 static inline void convert_free_block_range(struct btrfs_convert_context *cctx,
172                                       u64 block, int num)
173 {
174         cctx->convert_ops->free_block_range(cctx, block, num);
175 }
176
177 static inline int copy_inodes(struct btrfs_convert_context *cctx,
178                               struct btrfs_root *root, int datacsum,
179                               int packing, int noxattr, struct task_ctx *p)
180 {
181         return cctx->convert_ops->copy_inodes(cctx, root, datacsum, packing,
182                                              noxattr, p);
183 }
184
185 static inline void convert_close_fs(struct btrfs_convert_context *cctx)
186 {
187         cctx->convert_ops->close_fs(cctx);
188 }
189
190 /*
191  * Open Ext2fs in readonly mode, read block allocation bitmap and
192  * inode bitmap into memory.
193  */
194 static int ext2_open_fs(struct btrfs_convert_context *cctx, const char *name)
195 {
196         errcode_t ret;
197         ext2_filsys ext2_fs;
198         ext2_ino_t ino;
199         u32 ro_feature;
200
201         ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs);
202         if (ret) {
203                 fprintf(stderr, "ext2fs_open: %s\n", error_message(ret));
204                 return -1;
205         }
206         /*
207          * We need to know exactly the used space, some RO compat flags like
208          * BIGALLOC will affect how used space is present.
209          * So we need manuall check any unsupported RO compat flags
210          */
211         ro_feature = ext2_fs->super->s_feature_ro_compat;
212         if (ro_feature & ~EXT2_LIB_FEATURE_RO_COMPAT_SUPP) {
213                 error(
214 "unsupported RO features detected: %x, abort convert to avoid possible corruption",
215                       ro_feature & ~EXT2_LIB_FEATURE_COMPAT_SUPP);
216                 goto fail;
217         }
218         ret = ext2fs_read_inode_bitmap(ext2_fs);
219         if (ret) {
220                 fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n",
221                         error_message(ret));
222                 goto fail;
223         }
224         ret = ext2fs_read_block_bitmap(ext2_fs);
225         if (ret) {
226                 fprintf(stderr, "ext2fs_read_block_bitmap: %s\n",
227                         error_message(ret));
228                 goto fail;
229         }
230         /*
231          * search each block group for a free inode. this set up
232          * uninit block/inode bitmaps appropriately.
233          */
234         ino = 1;
235         while (ino <= ext2_fs->super->s_inodes_count) {
236                 ext2_ino_t foo;
237                 ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo);
238                 ino += EXT2_INODES_PER_GROUP(ext2_fs->super);
239         }
240
241         if (!(ext2_fs->super->s_feature_incompat &
242               EXT2_FEATURE_INCOMPAT_FILETYPE)) {
243                 fprintf(stderr, "filetype feature is missing\n");
244                 goto fail;
245         }
246
247         cctx->fs_data = ext2_fs;
248         cctx->blocksize = ext2_fs->blocksize;
249         cctx->block_count = ext2_fs->super->s_blocks_count;
250         cctx->total_bytes = ext2_fs->blocksize * ext2_fs->super->s_blocks_count;
251         cctx->volume_name = strndup(ext2_fs->super->s_volume_name, 16);
252         cctx->first_data_block = ext2_fs->super->s_first_data_block;
253         cctx->inodes_count = ext2_fs->super->s_inodes_count;
254         cctx->free_inodes_count = ext2_fs->super->s_free_inodes_count;
255         return 0;
256 fail:
257         ext2fs_close(ext2_fs);
258         return -1;
259 }
260
261 static int __ext2_add_one_block(ext2_filsys fs, char *bitmap,
262                                 unsigned long group_nr, struct cache_tree *used)
263 {
264         unsigned long offset;
265         unsigned i;
266         int ret = 0;
267
268         offset = fs->super->s_first_data_block;
269         offset /= EXT2FS_CLUSTER_RATIO(fs);
270         offset += group_nr * EXT2_CLUSTERS_PER_GROUP(fs->super);
271         for (i = 0; i < EXT2_CLUSTERS_PER_GROUP(fs->super); i++) {
272                 if (ext2fs_test_bit(i, bitmap)) {
273                         u64 start;
274
275                         start = (i + offset) * EXT2FS_CLUSTER_RATIO(fs);
276                         start *= fs->blocksize;
277                         ret = add_merge_cache_extent(used, start,
278                                                      fs->blocksize);
279                         if (ret < 0)
280                                 break;
281                 }
282         }
283         return ret;
284 }
285
286 /*
287  * Read all used ext2 space into cctx->used cache tree
288  */
289 static int ext2_read_used_space(struct btrfs_convert_context *cctx)
290 {
291         ext2_filsys fs = (ext2_filsys)cctx->fs_data;
292         blk64_t blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
293         struct cache_tree *used_tree = &cctx->used;
294         char *block_bitmap = NULL;
295         unsigned long i;
296         int block_nbytes;
297         int ret = 0;
298
299         block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
300         /* Shouldn't happen */
301         BUG_ON(!fs->block_map);
302
303         block_bitmap = malloc(block_nbytes);
304         if (!block_bitmap)
305                 return -ENOMEM;
306
307         for (i = 0; i < fs->group_desc_count; i++) {
308                 ret = ext2fs_get_block_bitmap_range(fs->block_map, blk_itr,
309                                                 block_nbytes * 8, block_bitmap);
310                 if (ret) {
311                         error("fail to get bitmap from ext2, %s",
312                               strerror(-ret));
313                         break;
314                 }
315                 ret = __ext2_add_one_block(fs, block_bitmap, i, used_tree);
316                 if (ret < 0) {
317                         error("fail to build used space tree, %s",
318                               strerror(-ret));
319                         break;
320                 }
321                 blk_itr += EXT2_CLUSTERS_PER_GROUP(fs->super);
322         }
323
324         free(block_bitmap);
325         return ret;
326 }
327
328 static void ext2_close_fs(struct btrfs_convert_context *cctx)
329 {
330         if (cctx->volume_name) {
331                 free(cctx->volume_name);
332                 cctx->volume_name = NULL;
333         }
334         ext2fs_close(cctx->fs_data);
335 }
336
337 static int ext2_alloc_block(struct btrfs_convert_context *cctx,
338                             u64 goal, u64 *block_ret)
339 {
340         ext2_filsys fs = cctx->fs_data;
341         blk_t block;
342
343         if (!ext2fs_new_block(fs, goal, NULL, &block)) {
344                 ext2fs_fast_mark_block_bitmap(fs->block_map, block);
345                 *block_ret = block;
346                 return 0;
347         }
348         return -ENOSPC;
349 }
350
351 static int ext2_alloc_block_range(struct btrfs_convert_context *cctx, u64 goal,
352                 int num, u64 *block_ret)
353 {
354         ext2_filsys fs = cctx->fs_data;
355         blk_t block;
356         ext2fs_block_bitmap bitmap = fs->block_map;
357         blk_t start = ext2fs_get_block_bitmap_start(bitmap);
358         blk_t end = ext2fs_get_block_bitmap_end(bitmap);
359
360         for (block = max_t(u64, goal, start); block + num < end; block++) {
361                 if (ext2fs_fast_test_block_bitmap_range(bitmap, block, num)) {
362                         ext2fs_fast_mark_block_bitmap_range(bitmap, block,
363                                         num);
364                         *block_ret = block;
365                         return 0;
366                 }
367         }
368         return -ENOSPC;
369 }
370
371 static void ext2_free_block(struct btrfs_convert_context *cctx, u64 block)
372 {
373         ext2_filsys fs = cctx->fs_data;
374
375         BUG_ON(block != (blk_t)block);
376         ext2fs_fast_unmark_block_bitmap(fs->block_map, block);
377 }
378
379 static void ext2_free_block_range(struct btrfs_convert_context *cctx, u64 block, int num)
380 {
381         ext2_filsys fs = cctx->fs_data;
382
383         BUG_ON(block != (blk_t)block);
384         ext2fs_fast_unmark_block_bitmap_range(fs->block_map, block, num);
385 }
386
387 static int cache_free_extents(struct btrfs_root *root,
388                               struct btrfs_convert_context *cctx)
389
390 {
391         int i, ret = 0;
392         blk_t block;
393         u64 bytenr;
394         u64 blocksize = cctx->blocksize;
395
396         block = cctx->first_data_block;
397         for (; block < cctx->block_count; block++) {
398                 if (convert_test_block(cctx, block))
399                         continue;
400                 bytenr = block * blocksize;
401                 ret = set_extent_dirty(&root->fs_info->free_space_cache,
402                                        bytenr, bytenr + blocksize - 1, 0);
403                 BUG_ON(ret);
404         }
405
406         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
407                 bytenr = btrfs_sb_offset(i);
408                 bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1);
409                 if (bytenr >= blocksize * cctx->block_count)
410                         break;
411                 clear_extent_dirty(&root->fs_info->free_space_cache, bytenr,
412                                    bytenr + BTRFS_STRIPE_LEN - 1, 0);
413         }
414
415         clear_extent_dirty(&root->fs_info->free_space_cache,
416                            0, BTRFS_SUPER_INFO_OFFSET - 1, 0);
417
418         return 0;
419 }
420
421 static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes,
422                                u64 hint_byte, struct btrfs_key *ins,
423                                int metadata)
424 {
425         u64 start;
426         u64 end;
427         u64 last = hint_byte;
428         int ret;
429         int wrapped = 0;
430         struct btrfs_block_group_cache *cache;
431
432         while(1) {
433                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
434                                             last, &start, &end, EXTENT_DIRTY);
435                 if (ret) {
436                         if (wrapped++ == 0) {
437                                 last = 0;
438                                 continue;
439                         } else {
440                                 goto fail;
441                         }
442                 }
443
444                 start = max(last, start);
445                 last = end + 1;
446                 if (last - start < num_bytes)
447                         continue;
448
449                 last = start + num_bytes;
450                 if (test_range_bit(&root->fs_info->pinned_extents,
451                                    start, last - 1, EXTENT_DIRTY, 0))
452                         continue;
453
454                 cache = btrfs_lookup_block_group(root->fs_info, start);
455                 BUG_ON(!cache);
456                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM ||
457                     last > cache->key.objectid + cache->key.offset) {
458                         last = cache->key.objectid + cache->key.offset;
459                         continue;
460                 }
461
462                 if (metadata) {
463                         BUG_ON(num_bytes != root->nodesize);
464                         if (check_crossing_stripes(start, num_bytes)) {
465                                 last = round_down(start + num_bytes,
466                                                   BTRFS_STRIPE_LEN);
467                                 continue;
468                         }
469                 }
470                 clear_extent_dirty(&root->fs_info->free_space_cache,
471                                    start, start + num_bytes - 1, 0);
472
473                 ins->objectid = start;
474                 ins->offset = num_bytes;
475                 ins->type = BTRFS_EXTENT_ITEM_KEY;
476                 return 0;
477         }
478 fail:
479         fprintf(stderr, "not enough free space\n");
480         return -ENOSPC;
481 }
482
483 static int intersect_with_sb(u64 bytenr, u64 num_bytes)
484 {
485         int i;
486         u64 offset;
487
488         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
489                 offset = btrfs_sb_offset(i);
490                 offset &= ~((u64)BTRFS_STRIPE_LEN - 1);
491
492                 if (bytenr < offset + BTRFS_STRIPE_LEN &&
493                     bytenr + num_bytes > offset)
494                         return 1;
495         }
496         return 0;
497 }
498
499 static int custom_free_extent(struct btrfs_root *root, u64 bytenr,
500                               u64 num_bytes)
501 {
502         return intersect_with_sb(bytenr, num_bytes);
503 }
504
505 static struct btrfs_extent_ops extent_ops = {
506         .alloc_extent = custom_alloc_extent,
507         .free_extent = custom_free_extent,
508 };
509
510 static int convert_insert_dirent(struct btrfs_trans_handle *trans,
511                                  struct btrfs_root *root,
512                                  const char *name, size_t name_len,
513                                  u64 dir, u64 objectid,
514                                  u8 file_type, u64 index_cnt,
515                                  struct btrfs_inode_item *inode)
516 {
517         int ret;
518         u64 inode_size;
519         struct btrfs_key location = {
520                 .objectid = objectid,
521                 .offset = 0,
522                 .type = BTRFS_INODE_ITEM_KEY,
523         };
524
525         ret = btrfs_insert_dir_item(trans, root, name, name_len,
526                                     dir, &location, file_type, index_cnt);
527         if (ret)
528                 return ret;
529         ret = btrfs_insert_inode_ref(trans, root, name, name_len,
530                                      objectid, dir, index_cnt);
531         if (ret)
532                 return ret;
533         inode_size = btrfs_stack_inode_size(inode) + name_len * 2;
534         btrfs_set_stack_inode_size(inode, inode_size);
535
536         return 0;
537 }
538
539 struct dir_iterate_data {
540         struct btrfs_trans_handle *trans;
541         struct btrfs_root *root;
542         struct btrfs_inode_item *inode;
543         u64 objectid;
544         u64 index_cnt;
545         u64 parent;
546         int errcode;
547 };
548
549 static u8 filetype_conversion_table[EXT2_FT_MAX] = {
550         [EXT2_FT_UNKNOWN]       = BTRFS_FT_UNKNOWN,
551         [EXT2_FT_REG_FILE]      = BTRFS_FT_REG_FILE,
552         [EXT2_FT_DIR]           = BTRFS_FT_DIR,
553         [EXT2_FT_CHRDEV]        = BTRFS_FT_CHRDEV,
554         [EXT2_FT_BLKDEV]        = BTRFS_FT_BLKDEV,
555         [EXT2_FT_FIFO]          = BTRFS_FT_FIFO,
556         [EXT2_FT_SOCK]          = BTRFS_FT_SOCK,
557         [EXT2_FT_SYMLINK]       = BTRFS_FT_SYMLINK,
558 };
559
560 static int dir_iterate_proc(ext2_ino_t dir, int entry,
561                             struct ext2_dir_entry *dirent,
562                             int offset, int blocksize,
563                             char *buf,void *priv_data)
564 {
565         int ret;
566         int file_type;
567         u64 objectid;
568         char dotdot[] = "..";
569         struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data;
570         int name_len;
571
572         name_len = dirent->name_len & 0xFF;
573
574         objectid = dirent->inode + INO_OFFSET;
575         if (!strncmp(dirent->name, dotdot, name_len)) {
576                 if (name_len == 2) {
577                         BUG_ON(idata->parent != 0);
578                         idata->parent = objectid;
579                 }
580                 return 0;
581         }
582         if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO)
583                 return 0;
584
585         file_type = dirent->name_len >> 8;
586         BUG_ON(file_type > EXT2_FT_SYMLINK);
587
588         ret = convert_insert_dirent(idata->trans, idata->root, dirent->name,
589                                     name_len, idata->objectid, objectid,
590                                     filetype_conversion_table[file_type],
591                                     idata->index_cnt, idata->inode);
592         if (ret < 0) {
593                 idata->errcode = ret;
594                 return BLOCK_ABORT;
595         }
596
597         idata->index_cnt++;
598         return 0;
599 }
600
601 static int create_dir_entries(struct btrfs_trans_handle *trans,
602                               struct btrfs_root *root, u64 objectid,
603                               struct btrfs_inode_item *btrfs_inode,
604                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
605 {
606         int ret;
607         errcode_t err;
608         struct dir_iterate_data data = {
609                 .trans          = trans,
610                 .root           = root,
611                 .inode          = btrfs_inode,
612                 .objectid       = objectid,
613                 .index_cnt      = 2,
614                 .parent         = 0,
615                 .errcode        = 0,
616         };
617
618         err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL,
619                                   dir_iterate_proc, &data);
620         if (err)
621                 goto error;
622         ret = data.errcode;
623         if (ret == 0 && data.parent == objectid) {
624                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
625                                              objectid, objectid, 0);
626         }
627         return ret;
628 error:
629         fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err));
630         return -1;
631 }
632
633 static int read_disk_extent(struct btrfs_root *root, u64 bytenr,
634                             u32 num_bytes, char *buffer)
635 {
636         int ret;
637         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
638
639         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr);
640         if (ret != num_bytes)
641                 goto fail;
642         ret = 0;
643 fail:
644         if (ret > 0)
645                 ret = -1;
646         return ret;
647 }
648
649 static int csum_disk_extent(struct btrfs_trans_handle *trans,
650                             struct btrfs_root *root,
651                             u64 disk_bytenr, u64 num_bytes)
652 {
653         u32 blocksize = root->sectorsize;
654         u64 offset;
655         char *buffer;
656         int ret = 0;
657
658         buffer = malloc(blocksize);
659         if (!buffer)
660                 return -ENOMEM;
661         for (offset = 0; offset < num_bytes; offset += blocksize) {
662                 ret = read_disk_extent(root, disk_bytenr + offset,
663                                         blocksize, buffer);
664                 if (ret)
665                         break;
666                 ret = btrfs_csum_file_block(trans,
667                                             root->fs_info->csum_root,
668                                             disk_bytenr + num_bytes,
669                                             disk_bytenr + offset,
670                                             buffer, blocksize);
671                 if (ret)
672                         break;
673         }
674         free(buffer);
675         return ret;
676 }
677
678 struct blk_iterate_data {
679         struct btrfs_trans_handle *trans;
680         struct btrfs_root *root;
681         struct btrfs_inode_item *inode;
682         u64 objectid;
683         u64 first_block;
684         u64 disk_block;
685         u64 num_blocks;
686         u64 boundary;
687         int checksum;
688         int errcode;
689 };
690
691 static void init_blk_iterate_data(struct blk_iterate_data *data,
692                                   struct btrfs_trans_handle *trans,
693                                   struct btrfs_root *root,
694                                   struct btrfs_inode_item *inode,
695                                   u64 objectid, int checksum)
696 {
697         data->trans             = trans;
698         data->root              = root;
699         data->inode             = inode;
700         data->objectid          = objectid;
701         data->first_block       = 0;
702         data->disk_block        = 0;
703         data->num_blocks        = 0;
704         data->boundary          = (u64)-1;
705         data->checksum          = checksum;
706         data->errcode           = 0;
707 }
708
709 static int record_file_blocks(struct blk_iterate_data *data,
710                               u64 file_block, u64 disk_block, u64 num_blocks)
711 {
712         int ret;
713         struct btrfs_root *root = data->root;
714         u64 file_pos = file_block * root->sectorsize;
715         u64 disk_bytenr = disk_block * root->sectorsize;
716         u64 num_bytes = num_blocks * root->sectorsize;
717         ret = btrfs_record_file_extent(data->trans, data->root,
718                                        data->objectid, data->inode, file_pos,
719                                        disk_bytenr, num_bytes);
720
721         if (ret || !data->checksum || disk_bytenr == 0)
722                 return ret;
723
724         return csum_disk_extent(data->trans, data->root, disk_bytenr,
725                                 num_bytes);
726 }
727
728 static int block_iterate_proc(u64 disk_block, u64 file_block,
729                               struct blk_iterate_data *idata)
730 {
731         int ret = 0;
732         int sb_region;
733         int do_barrier;
734         struct btrfs_root *root = idata->root;
735         struct btrfs_block_group_cache *cache;
736         u64 bytenr = disk_block * root->sectorsize;
737
738         sb_region = intersect_with_sb(bytenr, root->sectorsize);
739         do_barrier = sb_region || disk_block >= idata->boundary;
740         if ((idata->num_blocks > 0 && do_barrier) ||
741             (file_block > idata->first_block + idata->num_blocks) ||
742             (disk_block != idata->disk_block + idata->num_blocks)) {
743                 if (idata->num_blocks > 0) {
744                         ret = record_file_blocks(idata, idata->first_block,
745                                                  idata->disk_block,
746                                                  idata->num_blocks);
747                         if (ret)
748                                 goto fail;
749                         idata->first_block += idata->num_blocks;
750                         idata->num_blocks = 0;
751                 }
752                 if (file_block > idata->first_block) {
753                         ret = record_file_blocks(idata, idata->first_block,
754                                         0, file_block - idata->first_block);
755                         if (ret)
756                                 goto fail;
757                 }
758
759                 if (sb_region) {
760                         bytenr += BTRFS_STRIPE_LEN - 1;
761                         bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1);
762                 } else {
763                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
764                         BUG_ON(!cache);
765                         bytenr = cache->key.objectid + cache->key.offset;
766                 }
767
768                 idata->first_block = file_block;
769                 idata->disk_block = disk_block;
770                 idata->boundary = bytenr / root->sectorsize;
771         }
772         idata->num_blocks++;
773 fail:
774         return ret;
775 }
776
777 static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr,
778                                 e2_blkcnt_t blockcnt, blk_t ref_block,
779                                 int ref_offset, void *priv_data)
780 {
781         int ret;
782         struct blk_iterate_data *idata;
783         idata = (struct blk_iterate_data *)priv_data;
784         ret = block_iterate_proc(*blocknr, blockcnt, idata);
785         if (ret) {
786                 idata->errcode = ret;
787                 return BLOCK_ABORT;
788         }
789         return 0;
790 }
791
792 /*
793  * traverse file's data blocks, record these data blocks as file extents.
794  */
795 static int create_file_extents(struct btrfs_trans_handle *trans,
796                                struct btrfs_root *root, u64 objectid,
797                                struct btrfs_inode_item *btrfs_inode,
798                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
799                                int datacsum, int packing)
800 {
801         int ret;
802         char *buffer = NULL;
803         errcode_t err;
804         u32 last_block;
805         u32 sectorsize = root->sectorsize;
806         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
807         struct blk_iterate_data data;
808
809         init_blk_iterate_data(&data, trans, root, btrfs_inode, objectid,
810                               datacsum);
811
812         err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY,
813                                     NULL, __block_iterate_proc, &data);
814         if (err)
815                 goto error;
816         ret = data.errcode;
817         if (ret)
818                 goto fail;
819         if (packing && data.first_block == 0 && data.num_blocks > 0 &&
820             inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
821                 u64 num_bytes = data.num_blocks * sectorsize;
822                 u64 disk_bytenr = data.disk_block * sectorsize;
823                 u64 nbytes;
824
825                 buffer = malloc(num_bytes);
826                 if (!buffer)
827                         return -ENOMEM;
828                 ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer);
829                 if (ret)
830                         goto fail;
831                 if (num_bytes > inode_size)
832                         num_bytes = inode_size;
833                 ret = btrfs_insert_inline_extent(trans, root, objectid,
834                                                  0, buffer, num_bytes);
835                 if (ret)
836                         goto fail;
837                 nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes;
838                 btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes);
839         } else if (data.num_blocks > 0) {
840                 ret = record_file_blocks(&data, data.first_block,
841                                          data.disk_block, data.num_blocks);
842                 if (ret)
843                         goto fail;
844         }
845         data.first_block += data.num_blocks;
846         last_block = (inode_size + sectorsize - 1) / sectorsize;
847         if (last_block > data.first_block) {
848                 ret = record_file_blocks(&data, data.first_block, 0,
849                                          last_block - data.first_block);
850         }
851 fail:
852         free(buffer);
853         return ret;
854 error:
855         fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err));
856         return -1;
857 }
858
859 static int create_symbol_link(struct btrfs_trans_handle *trans,
860                               struct btrfs_root *root, u64 objectid,
861                               struct btrfs_inode_item *btrfs_inode,
862                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
863                               struct ext2_inode *ext2_inode)
864 {
865         int ret;
866         char *pathname;
867         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
868         if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) {
869                 btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1);
870                 ret = create_file_extents(trans, root, objectid, btrfs_inode,
871                                           ext2_fs, ext2_ino, 1, 1);
872                 btrfs_set_stack_inode_size(btrfs_inode, inode_size);
873                 return ret;
874         }
875
876         pathname = (char *)&(ext2_inode->i_block[0]);
877         BUG_ON(pathname[inode_size] != 0);
878         ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
879                                          pathname, inode_size + 1);
880         btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1);
881         return ret;
882 }
883
884 /*
885  * Following xattr/acl related codes are based on codes in
886  * fs/ext3/xattr.c and fs/ext3/acl.c
887  */
888 #define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr))
889 #define EXT2_XATTR_BFIRST(ptr) \
890         ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1))
891 #define EXT2_XATTR_IHDR(inode) \
892         ((struct ext2_ext_attr_header *) ((void *)(inode) + \
893                 EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize))
894 #define EXT2_XATTR_IFIRST(inode) \
895         ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \
896                 sizeof(EXT2_XATTR_IHDR(inode)->h_magic)))
897
898 static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry,
899                                   const void *end)
900 {
901         struct ext2_ext_attr_entry *next;
902
903         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
904                 next = EXT2_EXT_ATTR_NEXT(entry);
905                 if ((void *)next >= end)
906                         return -EIO;
907                 entry = next;
908         }
909         return 0;
910 }
911
912 static int ext2_xattr_check_block(const char *buf, size_t size)
913 {
914         int error;
915         struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf);
916
917         if (header->h_magic != EXT2_EXT_ATTR_MAGIC ||
918             header->h_blocks != 1)
919                 return -EIO;
920         error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size);
921         return error;
922 }
923
924 static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry,
925                                   size_t size)
926 {
927         size_t value_size = entry->e_value_size;
928
929         if (entry->e_value_block != 0 || value_size > size ||
930             entry->e_value_offs + value_size > size)
931                 return -EIO;
932         return 0;
933 }
934
935 #define EXT2_ACL_VERSION        0x0001
936
937 /* 23.2.5 acl_tag_t values */
938
939 #define ACL_UNDEFINED_TAG       (0x00)
940 #define ACL_USER_OBJ            (0x01)
941 #define ACL_USER                (0x02)
942 #define ACL_GROUP_OBJ           (0x04)
943 #define ACL_GROUP               (0x08)
944 #define ACL_MASK                (0x10)
945 #define ACL_OTHER               (0x20)
946
947 /* 23.2.7 ACL qualifier constants */
948
949 #define ACL_UNDEFINED_ID        ((id_t)-1)
950
951 typedef struct {
952         __le16          e_tag;
953         __le16          e_perm;
954         __le32          e_id;
955 } ext2_acl_entry;
956
957 typedef struct {
958         __le16          e_tag;
959         __le16          e_perm;
960 } ext2_acl_entry_short;
961
962 typedef struct {
963         __le32          a_version;
964 } ext2_acl_header;
965
966 static inline int ext2_acl_count(size_t size)
967 {
968         ssize_t s;
969         size -= sizeof(ext2_acl_header);
970         s = size - 4 * sizeof(ext2_acl_entry_short);
971         if (s < 0) {
972                 if (size % sizeof(ext2_acl_entry_short))
973                         return -1;
974                 return size / sizeof(ext2_acl_entry_short);
975         } else {
976                 if (s % sizeof(ext2_acl_entry))
977                         return -1;
978                 return s / sizeof(ext2_acl_entry) + 4;
979         }
980 }
981
982 #define ACL_EA_VERSION          0x0002
983
984 typedef struct {
985         __le16          e_tag;
986         __le16          e_perm;
987         __le32          e_id;
988 } acl_ea_entry;
989
990 typedef struct {
991         __le32          a_version;
992         acl_ea_entry    a_entries[0];
993 } acl_ea_header;
994
995 static inline size_t acl_ea_size(int count)
996 {
997         return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry);
998 }
999
1000 static int ext2_acl_to_xattr(void *dst, const void *src,
1001                              size_t dst_size, size_t src_size)
1002 {
1003         int i, count;
1004         const void *end = src + src_size;
1005         acl_ea_header *ext_acl = (acl_ea_header *)dst;
1006         acl_ea_entry *dst_entry = ext_acl->a_entries;
1007         ext2_acl_entry *src_entry;
1008
1009         if (src_size < sizeof(ext2_acl_header))
1010                 goto fail;
1011         if (((ext2_acl_header *)src)->a_version !=
1012             cpu_to_le32(EXT2_ACL_VERSION))
1013                 goto fail;
1014         src += sizeof(ext2_acl_header);
1015         count = ext2_acl_count(src_size);
1016         if (count <= 0)
1017                 goto fail;
1018
1019         BUG_ON(dst_size < acl_ea_size(count));
1020         ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION);
1021         for (i = 0; i < count; i++, dst_entry++) {
1022                 src_entry = (ext2_acl_entry *)src;
1023                 if (src + sizeof(ext2_acl_entry_short) > end)
1024                         goto fail;
1025                 dst_entry->e_tag = src_entry->e_tag;
1026                 dst_entry->e_perm = src_entry->e_perm;
1027                 switch (le16_to_cpu(src_entry->e_tag)) {
1028                 case ACL_USER_OBJ:
1029                 case ACL_GROUP_OBJ:
1030                 case ACL_MASK:
1031                 case ACL_OTHER:
1032                         src += sizeof(ext2_acl_entry_short);
1033                         dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
1034                         break;
1035                 case ACL_USER:
1036                 case ACL_GROUP:
1037                         src += sizeof(ext2_acl_entry);
1038                         if (src > end)
1039                                 goto fail;
1040                         dst_entry->e_id = src_entry->e_id;
1041                         break;
1042                 default:
1043                         goto fail;
1044                 }
1045         }
1046         if (src != end)
1047                 goto fail;
1048         return 0;
1049 fail:
1050         return -EINVAL;
1051 }
1052
1053 static char *xattr_prefix_table[] = {
1054         [1] =   "user.",
1055         [2] =   "system.posix_acl_access",
1056         [3] =   "system.posix_acl_default",
1057         [4] =   "trusted.",
1058         [6] =   "security.",
1059 };
1060
1061 static int copy_single_xattr(struct btrfs_trans_handle *trans,
1062                              struct btrfs_root *root, u64 objectid,
1063                              struct ext2_ext_attr_entry *entry,
1064                              const void *data, u32 datalen)
1065 {
1066         int ret = 0;
1067         int name_len;
1068         int name_index;
1069         void *databuf = NULL;
1070         char namebuf[XATTR_NAME_MAX + 1];
1071
1072         name_index = entry->e_name_index;
1073         if (name_index >= ARRAY_SIZE(xattr_prefix_table) ||
1074             xattr_prefix_table[name_index] == NULL)
1075                 return -EOPNOTSUPP;
1076         name_len = strlen(xattr_prefix_table[name_index]) +
1077                    entry->e_name_len;
1078         if (name_len >= sizeof(namebuf))
1079                 return -ERANGE;
1080
1081         if (name_index == 2 || name_index == 3) {
1082                 size_t bufsize = acl_ea_size(ext2_acl_count(datalen));
1083                 databuf = malloc(bufsize);
1084                 if (!databuf)
1085                        return -ENOMEM;
1086                 ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen);
1087                 if (ret)
1088                         goto out;
1089                 data = databuf;
1090                 datalen = bufsize;
1091         }
1092         strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX);
1093         strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len);
1094         if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) -
1095             sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) {
1096                 fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n",
1097                         objectid - INO_OFFSET, name_len, namebuf);
1098                 goto out;
1099         }
1100         ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len,
1101                                       data, datalen, objectid);
1102 out:
1103         free(databuf);
1104         return ret;
1105 }
1106
1107 static int copy_extended_attrs(struct btrfs_trans_handle *trans,
1108                                struct btrfs_root *root, u64 objectid,
1109                                struct btrfs_inode_item *btrfs_inode,
1110                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
1111 {
1112         int ret = 0;
1113         int inline_ea = 0;
1114         errcode_t err;
1115         u32 datalen;
1116         u32 block_size = ext2_fs->blocksize;
1117         u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super);
1118         struct ext2_inode_large *ext2_inode;
1119         struct ext2_ext_attr_entry *entry;
1120         void *data;
1121         char *buffer = NULL;
1122         char inode_buf[EXT2_GOOD_OLD_INODE_SIZE];
1123
1124         if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) {
1125                 ext2_inode = (struct ext2_inode_large *)inode_buf;
1126         } else {
1127                 ext2_inode = (struct ext2_inode_large *)malloc(inode_size);
1128                 if (!ext2_inode)
1129                        return -ENOMEM;
1130         }
1131         err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode,
1132                                      inode_size);
1133         if (err) {
1134                 fprintf(stderr, "ext2fs_read_inode_full: %s\n",
1135                         error_message(err));
1136                 ret = -1;
1137                 goto out;
1138         }
1139
1140         if (ext2_ino > ext2_fs->super->s_first_ino &&
1141             inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
1142                 if (EXT2_GOOD_OLD_INODE_SIZE +
1143                     ext2_inode->i_extra_isize > inode_size) {
1144                         ret = -EIO;
1145                         goto out;
1146                 }
1147                 if (ext2_inode->i_extra_isize != 0 &&
1148                     EXT2_XATTR_IHDR(ext2_inode)->h_magic ==
1149                     EXT2_EXT_ATTR_MAGIC) {
1150                         inline_ea = 1;
1151                 }
1152         }
1153         if (inline_ea) {
1154                 int total;
1155                 void *end = (void *)ext2_inode + inode_size;
1156                 entry = EXT2_XATTR_IFIRST(ext2_inode);
1157                 total = end - (void *)entry;
1158                 ret = ext2_xattr_check_names(entry, end);
1159                 if (ret)
1160                         goto out;
1161                 while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
1162                         ret = ext2_xattr_check_entry(entry, total);
1163                         if (ret)
1164                                 goto out;
1165                         data = (void *)EXT2_XATTR_IFIRST(ext2_inode) +
1166                                 entry->e_value_offs;
1167                         datalen = entry->e_value_size;
1168                         ret = copy_single_xattr(trans, root, objectid,
1169                                                 entry, data, datalen);
1170                         if (ret)
1171                                 goto out;
1172                         entry = EXT2_EXT_ATTR_NEXT(entry);
1173                 }
1174         }
1175
1176         if (ext2_inode->i_file_acl == 0)
1177                 goto out;
1178
1179         buffer = malloc(block_size);
1180         if (!buffer) {
1181                 ret = -ENOMEM;
1182                 goto out;
1183         }
1184         err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer);
1185         if (err) {
1186                 fprintf(stderr, "ext2fs_read_ext_attr: %s\n",
1187                         error_message(err));
1188                 ret = -1;
1189                 goto out;
1190         }
1191         ret = ext2_xattr_check_block(buffer, block_size);
1192         if (ret)
1193                 goto out;
1194
1195         entry = EXT2_XATTR_BFIRST(buffer);
1196         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
1197                 ret = ext2_xattr_check_entry(entry, block_size);
1198                 if (ret)
1199                         goto out;
1200                 data = buffer + entry->e_value_offs;
1201                 datalen = entry->e_value_size;
1202                 ret = copy_single_xattr(trans, root, objectid,
1203                                         entry, data, datalen);
1204                 if (ret)
1205                         goto out;
1206                 entry = EXT2_EXT_ATTR_NEXT(entry);
1207         }
1208 out:
1209         free(buffer);
1210         if ((void *)ext2_inode != inode_buf)
1211                 free(ext2_inode);
1212         return ret;
1213 }
1214 #define MINORBITS       20
1215 #define MKDEV(ma, mi)   (((ma) << MINORBITS) | (mi))
1216
1217 static inline dev_t old_decode_dev(u16 val)
1218 {
1219         return MKDEV((val >> 8) & 255, val & 255);
1220 }
1221
1222 static inline dev_t new_decode_dev(u32 dev)
1223 {
1224         unsigned major = (dev & 0xfff00) >> 8;
1225         unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
1226         return MKDEV(major, minor);
1227 }
1228
1229 static int copy_inode_item(struct btrfs_inode_item *dst,
1230                            struct ext2_inode *src, u32 blocksize)
1231 {
1232         btrfs_set_stack_inode_generation(dst, 1);
1233         btrfs_set_stack_inode_sequence(dst, 0);
1234         btrfs_set_stack_inode_transid(dst, 1);
1235         btrfs_set_stack_inode_size(dst, src->i_size);
1236         btrfs_set_stack_inode_nbytes(dst, 0);
1237         btrfs_set_stack_inode_block_group(dst, 0);
1238         btrfs_set_stack_inode_nlink(dst, src->i_links_count);
1239         btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16));
1240         btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16));
1241         btrfs_set_stack_inode_mode(dst, src->i_mode);
1242         btrfs_set_stack_inode_rdev(dst, 0);
1243         btrfs_set_stack_inode_flags(dst, 0);
1244         btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime);
1245         btrfs_set_stack_timespec_nsec(&dst->atime, 0);
1246         btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime);
1247         btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
1248         btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime);
1249         btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
1250         btrfs_set_stack_timespec_sec(&dst->otime, 0);
1251         btrfs_set_stack_timespec_nsec(&dst->otime, 0);
1252
1253         if (S_ISDIR(src->i_mode)) {
1254                 btrfs_set_stack_inode_size(dst, 0);
1255                 btrfs_set_stack_inode_nlink(dst, 1);
1256         }
1257         if (S_ISREG(src->i_mode)) {
1258                 btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 |
1259                                            (u64)src->i_size);
1260         }
1261         if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) &&
1262             !S_ISLNK(src->i_mode)) {
1263                 if (src->i_block[0]) {
1264                         btrfs_set_stack_inode_rdev(dst,
1265                                 old_decode_dev(src->i_block[0]));
1266                 } else {
1267                         btrfs_set_stack_inode_rdev(dst,
1268                                 new_decode_dev(src->i_block[1]));
1269                 }
1270         }
1271         memset(&dst->reserved, 0, sizeof(dst->reserved));
1272
1273         return 0;
1274 }
1275
1276 /*
1277  * copy a single inode. do all the required works, such as cloning
1278  * inode item, creating file extents and creating directory entries.
1279  */
1280 static int copy_single_inode(struct btrfs_trans_handle *trans,
1281                              struct btrfs_root *root, u64 objectid,
1282                              ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
1283                              struct ext2_inode *ext2_inode,
1284                              int datacsum, int packing, int noxattr)
1285 {
1286         int ret;
1287         struct btrfs_inode_item btrfs_inode;
1288
1289         if (ext2_inode->i_links_count == 0)
1290                 return 0;
1291
1292         copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize);
1293         if (!datacsum && S_ISREG(ext2_inode->i_mode)) {
1294                 u32 flags = btrfs_stack_inode_flags(&btrfs_inode) |
1295                             BTRFS_INODE_NODATASUM;
1296                 btrfs_set_stack_inode_flags(&btrfs_inode, flags);
1297         }
1298
1299         switch (ext2_inode->i_mode & S_IFMT) {
1300         case S_IFREG:
1301                 ret = create_file_extents(trans, root, objectid, &btrfs_inode,
1302                                         ext2_fs, ext2_ino, datacsum, packing);
1303                 break;
1304         case S_IFDIR:
1305                 ret = create_dir_entries(trans, root, objectid, &btrfs_inode,
1306                                          ext2_fs, ext2_ino);
1307                 break;
1308         case S_IFLNK:
1309                 ret = create_symbol_link(trans, root, objectid, &btrfs_inode,
1310                                          ext2_fs, ext2_ino, ext2_inode);
1311                 break;
1312         default:
1313                 ret = 0;
1314                 break;
1315         }
1316         if (ret)
1317                 return ret;
1318
1319         if (!noxattr) {
1320                 ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode,
1321                                           ext2_fs, ext2_ino);
1322                 if (ret)
1323                         return ret;
1324         }
1325         return btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1326 }
1327
1328 static int copy_disk_extent(struct btrfs_root *root, u64 dst_bytenr,
1329                             u64 src_bytenr, u32 num_bytes)
1330 {
1331         int ret;
1332         char *buffer;
1333         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
1334
1335         buffer = malloc(num_bytes);
1336         if (!buffer)
1337                 return -ENOMEM;
1338         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, src_bytenr);
1339         if (ret != num_bytes)
1340                 goto fail;
1341         ret = pwrite(fs_devs->latest_bdev, buffer, num_bytes, dst_bytenr);
1342         if (ret != num_bytes)
1343                 goto fail;
1344         ret = 0;
1345 fail:
1346         free(buffer);
1347         if (ret > 0)
1348                 ret = -1;
1349         return ret;
1350 }
1351 /*
1352  * scan ext2's inode bitmap and copy all used inodes.
1353  */
1354 static int ext2_copy_inodes(struct btrfs_convert_context *cctx,
1355                             struct btrfs_root *root,
1356                             int datacsum, int packing, int noxattr, struct task_ctx *p)
1357 {
1358         ext2_filsys ext2_fs = cctx->fs_data;
1359         int ret;
1360         errcode_t err;
1361         ext2_inode_scan ext2_scan;
1362         struct ext2_inode ext2_inode;
1363         ext2_ino_t ext2_ino;
1364         u64 objectid;
1365         struct btrfs_trans_handle *trans;
1366
1367         trans = btrfs_start_transaction(root, 1);
1368         if (!trans)
1369                 return -ENOMEM;
1370         err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan);
1371         if (err) {
1372                 fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err));
1373                 return -1;
1374         }
1375         while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino,
1376                                              &ext2_inode))) {
1377                 /* no more inodes */
1378                 if (ext2_ino == 0)
1379                         break;
1380                 /* skip special inode in ext2fs */
1381                 if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO &&
1382                     ext2_ino != EXT2_ROOT_INO)
1383                         continue;
1384                 objectid = ext2_ino + INO_OFFSET;
1385                 ret = copy_single_inode(trans, root,
1386                                         objectid, ext2_fs, ext2_ino,
1387                                         &ext2_inode, datacsum, packing,
1388                                         noxattr);
1389                 p->cur_copy_inodes++;
1390                 if (ret)
1391                         return ret;
1392                 if (trans->blocks_used >= 4096) {
1393                         ret = btrfs_commit_transaction(trans, root);
1394                         BUG_ON(ret);
1395                         trans = btrfs_start_transaction(root, 1);
1396                         BUG_ON(!trans);
1397                 }
1398         }
1399         if (err) {
1400                 fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err));
1401                 return -1;
1402         }
1403         ret = btrfs_commit_transaction(trans, root);
1404         BUG_ON(ret);
1405         ext2fs_close_inode_scan(ext2_scan);
1406
1407         return ret;
1408 }
1409
1410 static int ext2_test_block(struct btrfs_convert_context *cctx, u64 block)
1411 {
1412         ext2_filsys ext2_fs = cctx->fs_data;
1413
1414         BUG_ON(block != (u32)block);
1415         return ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block);
1416 }
1417
1418 /*
1419  * Construct a range of ext2fs image file.
1420  * scan block allocation bitmap, find all blocks used by the ext2fs
1421  * in this range and create file extents that point to these blocks.
1422  *
1423  * Note: Before calling the function, no file extent points to blocks
1424  *       in this range
1425  */
1426 static int create_image_file_range(struct btrfs_trans_handle *trans,
1427                                    struct btrfs_root *root, u64 objectid,
1428                                    struct btrfs_inode_item *inode,
1429                                    u64 start_byte, u64 end_byte,
1430                                    struct btrfs_convert_context *cctx, int datacsum)
1431 {
1432         u32 blocksize = cctx->blocksize;
1433         u32 block = start_byte / blocksize;
1434         u32 last_block = (end_byte + blocksize - 1) / blocksize;
1435         int ret = 0;
1436         struct blk_iterate_data data;
1437
1438         init_blk_iterate_data(&data, trans, root, inode, objectid, datacsum);
1439         data.first_block = block;
1440
1441         for (; start_byte < end_byte; block++, start_byte += blocksize) {
1442                 if (!convert_test_block(cctx, block))
1443                         continue;
1444                 ret = block_iterate_proc(block, block, &data);
1445                 if (ret < 0)
1446                         goto fail;
1447         }
1448         if (data.num_blocks > 0) {
1449                 ret = record_file_blocks(&data, data.first_block,
1450                                          data.disk_block, data.num_blocks);
1451                 if (ret)
1452                         goto fail;
1453                 data.first_block += data.num_blocks;
1454         }
1455         if (last_block > data.first_block) {
1456                 ret = record_file_blocks(&data, data.first_block, 0,
1457                                          last_block - data.first_block);
1458                 if (ret)
1459                         goto fail;
1460         }
1461 fail:
1462         return ret;
1463 }
1464 /*
1465  * Create the fs image file.
1466  */
1467 static int create_image(struct btrfs_convert_context *cctx,
1468                         struct btrfs_root *root, const char *name, int datacsum)
1469 {
1470         int ret;
1471         struct btrfs_key key;
1472         struct btrfs_key location;
1473         struct btrfs_path path;
1474         struct btrfs_inode_item btrfs_inode;
1475         struct btrfs_inode_item *inode_item;
1476         struct extent_buffer *leaf;
1477         struct btrfs_fs_info *fs_info = root->fs_info;
1478         struct btrfs_root *extent_root = fs_info->extent_root;
1479         struct btrfs_trans_handle *trans;
1480         struct btrfs_extent_item *ei;
1481         struct btrfs_extent_inline_ref *iref;
1482         struct btrfs_extent_data_ref *dref;
1483         u64 bytenr;
1484         u64 num_bytes;
1485         u64 objectid;
1486         u64 last_byte;
1487         u64 first_free;
1488         u64 total_bytes;
1489         u64 flags = BTRFS_INODE_READONLY;
1490         u32 sectorsize = root->sectorsize;
1491
1492         total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
1493         first_free =  BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1;
1494         first_free &= ~((u64)sectorsize - 1);
1495         if (!datacsum)
1496                 flags |= BTRFS_INODE_NODATASUM;
1497
1498         memset(&btrfs_inode, 0, sizeof(btrfs_inode));
1499         btrfs_set_stack_inode_generation(&btrfs_inode, 1);
1500         btrfs_set_stack_inode_size(&btrfs_inode, total_bytes);
1501         btrfs_set_stack_inode_nlink(&btrfs_inode, 1);
1502         btrfs_set_stack_inode_nbytes(&btrfs_inode, 0);
1503         btrfs_set_stack_inode_mode(&btrfs_inode, S_IFREG | 0400);
1504         btrfs_set_stack_inode_flags(&btrfs_inode,  flags);
1505         btrfs_init_path(&path);
1506         trans = btrfs_start_transaction(root, 1);
1507         BUG_ON(!trans);
1508
1509         objectid = btrfs_root_dirid(&root->root_item);
1510         ret = btrfs_find_free_objectid(trans, root, objectid, &objectid);
1511         if (ret)
1512                 goto fail;
1513
1514         /*
1515          * copy blocks covered by extent #0 to new positions. extent #0 is
1516          * special, we can't rely on relocate_extents_range to relocate it.
1517          */
1518         for (last_byte = 0; last_byte < first_free; last_byte += sectorsize) {
1519                 ret = custom_alloc_extent(root, sectorsize, 0, &key, 0);
1520                 if (ret)
1521                         goto fail;
1522                 ret = copy_disk_extent(root, key.objectid, last_byte,
1523                                        sectorsize);
1524                 if (ret)
1525                         goto fail;
1526                 ret = btrfs_record_file_extent(trans, root, objectid,
1527                                                &btrfs_inode, last_byte,
1528                                                key.objectid, sectorsize);
1529                 if (ret)
1530                         goto fail;
1531                 if (datacsum) {
1532                         ret = csum_disk_extent(trans, root, key.objectid,
1533                                                sectorsize);
1534                         if (ret)
1535                                 goto fail;
1536                 }
1537         }
1538
1539         while(1) {
1540                 key.objectid = last_byte;
1541                 key.offset = 0;
1542                 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1543                 ret = btrfs_search_slot(trans, fs_info->extent_root,
1544                                         &key, &path, 0, 0);
1545                 if (ret < 0)
1546                         goto fail;
1547 next:
1548                 leaf = path.nodes[0];
1549                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1550                         ret = btrfs_next_leaf(extent_root, &path);
1551                         if (ret < 0)
1552                                 goto fail;
1553                         if (ret > 0)
1554                                 break;
1555                         leaf = path.nodes[0];
1556                 }
1557                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1558                 if (last_byte > key.objectid ||
1559                     key.type != BTRFS_EXTENT_ITEM_KEY) {
1560                         path.slots[0]++;
1561                         goto next;
1562                 }
1563
1564                 bytenr = key.objectid;
1565                 num_bytes = key.offset;
1566                 ei = btrfs_item_ptr(leaf, path.slots[0],
1567                                     struct btrfs_extent_item);
1568                 if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) {
1569                         path.slots[0]++;
1570                         goto next;
1571                 }
1572
1573                 BUG_ON(btrfs_item_size_nr(leaf, path.slots[0]) != sizeof(*ei) +
1574                        btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY));
1575
1576                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
1577                 key.type = btrfs_extent_inline_ref_type(leaf, iref);
1578                 BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
1579                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1580                 if (btrfs_extent_data_ref_root(leaf, dref) !=
1581                     BTRFS_FS_TREE_OBJECTID) {
1582                         path.slots[0]++;
1583                         goto next;
1584                 }
1585
1586                 if (bytenr > last_byte) {
1587                         ret = create_image_file_range(trans, root, objectid,
1588                                                       &btrfs_inode, last_byte,
1589                                                       bytenr, cctx,
1590                                                       datacsum);
1591                         if (ret)
1592                                 goto fail;
1593                 }
1594                 ret = btrfs_record_file_extent(trans, root, objectid,
1595                                                &btrfs_inode, bytenr, bytenr,
1596                                                num_bytes);
1597                 if (ret)
1598                         goto fail;
1599                 last_byte = bytenr + num_bytes;
1600                 btrfs_release_path(&path);
1601
1602                 if (trans->blocks_used >= 4096) {
1603                         ret = btrfs_commit_transaction(trans, root);
1604                         BUG_ON(ret);
1605                         trans = btrfs_start_transaction(root, 1);
1606                         BUG_ON(!trans);
1607                 }
1608         }
1609         btrfs_release_path(&path);
1610         if (total_bytes > last_byte) {
1611                 ret = create_image_file_range(trans, root, objectid,
1612                                               &btrfs_inode, last_byte,
1613                                               total_bytes, cctx,
1614                                               datacsum);
1615                 if (ret)
1616                         goto fail;
1617         }
1618
1619         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1620         if (ret)
1621                 goto fail;
1622
1623         location.objectid = objectid;
1624         location.offset = 0;
1625         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1626         ret = btrfs_insert_dir_item(trans, root, name, strlen(name),
1627                                     btrfs_root_dirid(&root->root_item),
1628                                     &location, BTRFS_FT_REG_FILE, objectid);
1629         if (ret)
1630                 goto fail;
1631         ret = btrfs_insert_inode_ref(trans, root, name, strlen(name),
1632                                      objectid,
1633                                      btrfs_root_dirid(&root->root_item),
1634                                      objectid);
1635         if (ret)
1636                 goto fail;
1637         location.objectid = btrfs_root_dirid(&root->root_item);
1638         location.offset = 0;
1639         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1640         ret = btrfs_lookup_inode(trans, root, &path, &location, 1);
1641         if (ret)
1642                 goto fail;
1643         leaf = path.nodes[0];
1644         inode_item = btrfs_item_ptr(leaf, path.slots[0],
1645                                     struct btrfs_inode_item);
1646         btrfs_set_inode_size(leaf, inode_item, strlen(name) * 2 +
1647                              btrfs_inode_size(leaf, inode_item));
1648         btrfs_mark_buffer_dirty(leaf);
1649         btrfs_release_path(&path);
1650         ret = btrfs_commit_transaction(trans, root);
1651         BUG_ON(ret);
1652 fail:
1653         btrfs_release_path(&path);
1654         return ret;
1655 }
1656
1657 static struct btrfs_root * link_subvol(struct btrfs_root *root,
1658                 const char *base, u64 root_objectid)
1659 {
1660         struct btrfs_trans_handle *trans;
1661         struct btrfs_fs_info *fs_info = root->fs_info;
1662         struct btrfs_root *tree_root = fs_info->tree_root;
1663         struct btrfs_root *new_root = NULL;
1664         struct btrfs_path *path;
1665         struct btrfs_inode_item *inode_item;
1666         struct extent_buffer *leaf;
1667         struct btrfs_key key;
1668         u64 dirid = btrfs_root_dirid(&root->root_item);
1669         u64 index = 2;
1670         char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
1671         int len;
1672         int i;
1673         int ret;
1674
1675         len = strlen(base);
1676         if (len == 0 || len > BTRFS_NAME_LEN)
1677                 return NULL;
1678
1679         path = btrfs_alloc_path();
1680         BUG_ON(!path);
1681
1682         key.objectid = dirid;
1683         key.type = BTRFS_DIR_INDEX_KEY;
1684         key.offset = (u64)-1;
1685
1686         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1687         BUG_ON(ret <= 0);
1688
1689         if (path->slots[0] > 0) {
1690                 path->slots[0]--;
1691                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1692                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
1693                         index = key.offset + 1;
1694         }
1695         btrfs_release_path(path);
1696
1697         trans = btrfs_start_transaction(root, 1);
1698         BUG_ON(!trans);
1699
1700         key.objectid = dirid;
1701         key.offset = 0;
1702         key.type =  BTRFS_INODE_ITEM_KEY;
1703
1704         ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1705         BUG_ON(ret);
1706         leaf = path->nodes[0];
1707         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1708                                     struct btrfs_inode_item);
1709
1710         key.objectid = root_objectid;
1711         key.offset = (u64)-1;
1712         key.type = BTRFS_ROOT_ITEM_KEY;
1713
1714         memcpy(buf, base, len);
1715         for (i = 0; i < 1024; i++) {
1716                 ret = btrfs_insert_dir_item(trans, root, buf, len,
1717                                             dirid, &key, BTRFS_FT_DIR, index);
1718                 if (ret != -EEXIST)
1719                         break;
1720                 len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
1721                 if (len < 1 || len > BTRFS_NAME_LEN) {
1722                         ret = -EINVAL;
1723                         break;
1724                 }
1725         }
1726         if (ret)
1727                 goto fail;
1728
1729         btrfs_set_inode_size(leaf, inode_item, len * 2 +
1730                              btrfs_inode_size(leaf, inode_item));
1731         btrfs_mark_buffer_dirty(leaf);
1732         btrfs_release_path(path);
1733
1734         /* add the backref first */
1735         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
1736                                  BTRFS_ROOT_BACKREF_KEY,
1737                                  root->root_key.objectid,
1738                                  dirid, index, buf, len);
1739         BUG_ON(ret);
1740
1741         /* now add the forward ref */
1742         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
1743                                  BTRFS_ROOT_REF_KEY, root_objectid,
1744                                  dirid, index, buf, len);
1745
1746         ret = btrfs_commit_transaction(trans, root);
1747         BUG_ON(ret);
1748
1749         new_root = btrfs_read_fs_root(fs_info, &key);
1750         if (IS_ERR(new_root))
1751                 new_root = NULL;
1752 fail:
1753         btrfs_free_path(path);
1754         return new_root;
1755 }
1756
1757 static int create_chunk_mapping(struct btrfs_trans_handle *trans,
1758                                 struct btrfs_root *root)
1759 {
1760         struct btrfs_fs_info *info = root->fs_info;
1761         struct btrfs_root *chunk_root = info->chunk_root;
1762         struct btrfs_root *extent_root = info->extent_root;
1763         struct btrfs_device *device;
1764         struct btrfs_block_group_cache *cache;
1765         struct btrfs_dev_extent *extent;
1766         struct extent_buffer *leaf;
1767         struct btrfs_chunk chunk;
1768         struct btrfs_key key;
1769         struct btrfs_path path;
1770         u64 cur_start;
1771         u64 total_bytes;
1772         u64 chunk_objectid;
1773         int ret;
1774
1775         btrfs_init_path(&path);
1776
1777         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1778         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1779
1780         BUG_ON(list_empty(&info->fs_devices->devices));
1781         device = list_entry(info->fs_devices->devices.next,
1782                             struct btrfs_device, dev_list);
1783         BUG_ON(device->devid != info->fs_devices->latest_devid);
1784
1785         /* delete device extent created by make_btrfs */
1786         key.objectid = device->devid;
1787         key.offset = 0;
1788         key.type = BTRFS_DEV_EXTENT_KEY;
1789         ret = btrfs_search_slot(trans, device->dev_root, &key, &path, -1, 1);
1790         if (ret < 0)
1791                 goto err;
1792
1793         BUG_ON(ret > 0);
1794         ret = btrfs_del_item(trans, device->dev_root, &path);
1795         if (ret)
1796                 goto err;
1797         btrfs_release_path(&path);
1798
1799         /* delete chunk item created by make_btrfs */
1800         key.objectid = chunk_objectid;
1801         key.offset = 0;
1802         key.type = BTRFS_CHUNK_ITEM_KEY;
1803         ret = btrfs_search_slot(trans, chunk_root, &key, &path, -1, 1);
1804         if (ret < 0)
1805                 goto err;
1806
1807         BUG_ON(ret > 0);
1808         ret = btrfs_del_item(trans, chunk_root, &path);
1809         if (ret)
1810                 goto err;
1811         btrfs_release_path(&path);
1812
1813         /* for each block group, create device extent and chunk item */
1814         cur_start = 0;
1815         while (cur_start < total_bytes) {
1816                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
1817                 BUG_ON(!cache);
1818
1819                 /* insert device extent */
1820                 key.objectid = device->devid;
1821                 key.offset = cache->key.objectid;
1822                 key.type = BTRFS_DEV_EXTENT_KEY;
1823                 ret = btrfs_insert_empty_item(trans, device->dev_root, &path,
1824                                               &key, sizeof(*extent));
1825                 if (ret)
1826                         goto err;
1827
1828                 leaf = path.nodes[0];
1829                 extent = btrfs_item_ptr(leaf, path.slots[0],
1830                                         struct btrfs_dev_extent);
1831
1832                 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1833                                                 chunk_root->root_key.objectid);
1834                 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1835                                                     chunk_objectid);
1836                 btrfs_set_dev_extent_chunk_offset(leaf, extent,
1837                                                   cache->key.objectid);
1838                 btrfs_set_dev_extent_length(leaf, extent, cache->key.offset);
1839                 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1840                     (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
1841                     BTRFS_UUID_SIZE);
1842                 btrfs_mark_buffer_dirty(leaf);
1843                 btrfs_release_path(&path);
1844
1845                 /* insert chunk item */
1846                 btrfs_set_stack_chunk_length(&chunk, cache->key.offset);
1847                 btrfs_set_stack_chunk_owner(&chunk,
1848                                             extent_root->root_key.objectid);
1849                 btrfs_set_stack_chunk_stripe_len(&chunk, BTRFS_STRIPE_LEN);
1850                 btrfs_set_stack_chunk_type(&chunk, cache->flags);
1851                 btrfs_set_stack_chunk_io_align(&chunk, device->io_align);
1852                 btrfs_set_stack_chunk_io_width(&chunk, device->io_width);
1853                 btrfs_set_stack_chunk_sector_size(&chunk, device->sector_size);
1854                 btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1855                 btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1856                 btrfs_set_stack_stripe_devid(&chunk.stripe, device->devid);
1857                 btrfs_set_stack_stripe_offset(&chunk.stripe,
1858                                               cache->key.objectid);
1859                 memcpy(&chunk.stripe.dev_uuid, device->uuid, BTRFS_UUID_SIZE);
1860
1861                 key.objectid = chunk_objectid;
1862                 key.offset = cache->key.objectid;
1863                 key.type = BTRFS_CHUNK_ITEM_KEY;
1864
1865                 ret = btrfs_insert_item(trans, chunk_root, &key, &chunk,
1866                                         btrfs_chunk_item_size(1));
1867                 if (ret)
1868                         goto err;
1869
1870                 cur_start = cache->key.objectid + cache->key.offset;
1871         }
1872
1873         device->bytes_used = total_bytes;
1874         ret = btrfs_update_device(trans, device);
1875 err:
1876         btrfs_release_path(&path);
1877         return ret;
1878 }
1879
1880 static int create_subvol(struct btrfs_trans_handle *trans,
1881                          struct btrfs_root *root, u64 root_objectid)
1882 {
1883         struct extent_buffer *tmp;
1884         struct btrfs_root *new_root;
1885         struct btrfs_key key;
1886         struct btrfs_root_item root_item;
1887         int ret;
1888
1889         ret = btrfs_copy_root(trans, root, root->node, &tmp,
1890                               root_objectid);
1891         BUG_ON(ret);
1892
1893         memcpy(&root_item, &root->root_item, sizeof(root_item));
1894         btrfs_set_root_bytenr(&root_item, tmp->start);
1895         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
1896         btrfs_set_root_generation(&root_item, trans->transid);
1897         free_extent_buffer(tmp);
1898
1899         key.objectid = root_objectid;
1900         key.type = BTRFS_ROOT_ITEM_KEY;
1901         key.offset = trans->transid;
1902         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
1903                                 &key, &root_item);
1904
1905         key.offset = (u64)-1;
1906         new_root = btrfs_read_fs_root(root->fs_info, &key);
1907         BUG_ON(!new_root || IS_ERR(new_root));
1908
1909         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
1910         BUG_ON(ret);
1911
1912         return 0;
1913 }
1914
1915 static int init_btrfs(struct btrfs_root *root)
1916 {
1917         int ret;
1918         struct btrfs_key location;
1919         struct btrfs_trans_handle *trans;
1920         struct btrfs_fs_info *fs_info = root->fs_info;
1921         struct extent_buffer *tmp;
1922
1923         trans = btrfs_start_transaction(root, 1);
1924         BUG_ON(!trans);
1925         ret = btrfs_make_block_groups(trans, root);
1926         if (ret)
1927                 goto err;
1928         ret = btrfs_fix_block_accounting(trans, root);
1929         if (ret)
1930                 goto err;
1931         ret = create_chunk_mapping(trans, root);
1932         if (ret)
1933                 goto err;
1934         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1935                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1936         if (ret)
1937                 goto err;
1938         memcpy(&location, &root->root_key, sizeof(location));
1939         location.offset = (u64)-1;
1940         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1941                                 btrfs_super_root_dir(fs_info->super_copy),
1942                                 &location, BTRFS_FT_DIR, 0);
1943         if (ret)
1944                 goto err;
1945         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1946                                 location.objectid,
1947                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1948         if (ret)
1949                 goto err;
1950         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1951                              BTRFS_FIRST_FREE_OBJECTID);
1952
1953         /* subvol for fs image file */
1954         ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
1955         BUG_ON(ret);
1956         /* subvol for data relocation */
1957         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1958         BUG_ON(ret);
1959
1960         extent_buffer_get(fs_info->csum_root->node);
1961         ret = __btrfs_cow_block(trans, fs_info->csum_root,
1962                                 fs_info->csum_root->node, NULL, 0, &tmp, 0, 0);
1963         BUG_ON(ret);
1964         free_extent_buffer(tmp);
1965
1966         ret = btrfs_commit_transaction(trans, root);
1967         BUG_ON(ret);
1968 err:
1969         return ret;
1970 }
1971
1972 /*
1973  * Migrate super block to its default position and zero 0 ~ 16k
1974  */
1975 static int migrate_super_block(int fd, u64 old_bytenr, u32 sectorsize)
1976 {
1977         int ret;
1978         struct extent_buffer *buf;
1979         struct btrfs_super_block *super;
1980         u32 len;
1981         u32 bytenr;
1982
1983         BUG_ON(sectorsize < sizeof(*super));
1984         buf = malloc(sizeof(*buf) + sectorsize);
1985         if (!buf)
1986                 return -ENOMEM;
1987
1988         buf->len = sectorsize;
1989         ret = pread(fd, buf->data, sectorsize, old_bytenr);
1990         if (ret != sectorsize)
1991                 goto fail;
1992
1993         super = (struct btrfs_super_block *)buf->data;
1994         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1995         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1996
1997         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1998         ret = pwrite(fd, buf->data, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1999         if (ret != sectorsize)
2000                 goto fail;
2001
2002         ret = fsync(fd);
2003         if (ret)
2004                 goto fail;
2005
2006         memset(buf->data, 0, sectorsize);
2007         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
2008                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
2009                 if (len > sectorsize)
2010                         len = sectorsize;
2011                 ret = pwrite(fd, buf->data, len, bytenr);
2012                 if (ret != len) {
2013                         fprintf(stderr, "unable to zero fill device\n");
2014                         break;
2015                 }
2016                 bytenr += len;
2017         }
2018         ret = 0;
2019         fsync(fd);
2020 fail:
2021         free(buf);
2022         if (ret > 0)
2023                 ret = -1;
2024         return ret;
2025 }
2026
2027 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
2028 {
2029         struct btrfs_chunk *chunk;
2030         struct btrfs_disk_key *key;
2031         u32 sectorsize = btrfs_super_sectorsize(super);
2032
2033         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
2034         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
2035                                        sizeof(struct btrfs_disk_key));
2036
2037         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
2038         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
2039         btrfs_set_disk_key_offset(key, 0);
2040
2041         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
2042         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
2043         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
2044         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
2045         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
2046         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
2047         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
2048         btrfs_set_stack_chunk_num_stripes(chunk, 1);
2049         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
2050         chunk->stripe.devid = super->dev_item.devid;
2051         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
2052         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
2053         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
2054         return 0;
2055 }
2056
2057 static int prepare_system_chunk(int fd, u64 sb_bytenr)
2058 {
2059         int ret;
2060         struct extent_buffer *buf;
2061         struct btrfs_super_block *super;
2062
2063         BUG_ON(BTRFS_SUPER_INFO_SIZE < sizeof(*super));
2064         buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE);
2065         if (!buf)
2066                 return -ENOMEM;
2067
2068         buf->len = BTRFS_SUPER_INFO_SIZE;
2069         ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
2070         if (ret != BTRFS_SUPER_INFO_SIZE)
2071                 goto fail;
2072
2073         super = (struct btrfs_super_block *)buf->data;
2074         BUG_ON(btrfs_super_bytenr(super) != sb_bytenr);
2075         BUG_ON(btrfs_super_num_devices(super) != 1);
2076
2077         ret = prepare_system_chunk_sb(super);
2078         if (ret)
2079                 goto fail;
2080
2081         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
2082         ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
2083         if (ret != BTRFS_SUPER_INFO_SIZE)
2084                 goto fail;
2085
2086         ret = 0;
2087 fail:
2088         free(buf);
2089         if (ret > 0)
2090                 ret = -1;
2091         return ret;
2092 }
2093
2094 static int relocate_one_reference(struct btrfs_trans_handle *trans,
2095                                   struct btrfs_root *root,
2096                                   u64 extent_start, u64 extent_size,
2097                                   struct btrfs_key *extent_key,
2098                                   struct extent_io_tree *reloc_tree)
2099 {
2100         struct extent_buffer *leaf;
2101         struct btrfs_file_extent_item *fi;
2102         struct btrfs_key key;
2103         struct btrfs_path path;
2104         struct btrfs_inode_item inode;
2105         struct blk_iterate_data data;
2106         u64 bytenr;
2107         u64 num_bytes;
2108         u64 cur_offset;
2109         u64 new_pos;
2110         u64 nbytes;
2111         u64 sector_end;
2112         u32 sectorsize = root->sectorsize;
2113         unsigned long ptr;
2114         int datacsum;
2115         int fd;
2116         int ret;
2117
2118         btrfs_init_path(&path);
2119         ret = btrfs_search_slot(trans, root, extent_key, &path, -1, 1);
2120         if (ret)
2121                 goto fail;
2122
2123         leaf = path.nodes[0];
2124         fi = btrfs_item_ptr(leaf, path.slots[0],
2125                             struct btrfs_file_extent_item);
2126         BUG_ON(btrfs_file_extent_offset(leaf, fi) > 0);
2127         if (extent_start != btrfs_file_extent_disk_bytenr(leaf, fi) ||
2128             extent_size != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
2129                 ret = 1;
2130                 goto fail;
2131         }
2132
2133         bytenr = extent_start + btrfs_file_extent_offset(leaf, fi);
2134         num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
2135
2136         ret = btrfs_del_item(trans, root, &path);
2137         if (ret)
2138                 goto fail;
2139
2140         ret = btrfs_free_extent(trans, root, extent_start, extent_size, 0,
2141                                 root->root_key.objectid,
2142                                 extent_key->objectid, extent_key->offset);
2143         if (ret)
2144                 goto fail;
2145
2146         btrfs_release_path(&path);
2147
2148         key.objectid = extent_key->objectid;
2149         key.offset = 0;
2150         key.type =  BTRFS_INODE_ITEM_KEY;
2151         ret = btrfs_lookup_inode(trans, root, &path, &key, 0);
2152         if (ret)
2153                 goto fail;
2154
2155         leaf = path.nodes[0];
2156         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2157         read_extent_buffer(leaf, &inode, ptr, sizeof(inode));
2158         btrfs_release_path(&path);
2159
2160         BUG_ON(num_bytes & (sectorsize - 1));
2161         nbytes = btrfs_stack_inode_nbytes(&inode) - num_bytes;
2162         btrfs_set_stack_inode_nbytes(&inode, nbytes);
2163         datacsum = !(btrfs_stack_inode_flags(&inode) & BTRFS_INODE_NODATASUM);
2164
2165         init_blk_iterate_data(&data, trans, root, &inode, extent_key->objectid,
2166                               datacsum);
2167         data.first_block = extent_key->offset;
2168
2169         cur_offset = extent_key->offset;
2170         while (num_bytes > 0) {
2171                 sector_end = bytenr + sectorsize - 1;
2172                 if (test_range_bit(reloc_tree, bytenr, sector_end,
2173                                    EXTENT_LOCKED, 1)) {
2174                         ret = get_state_private(reloc_tree, bytenr, &new_pos);
2175                         BUG_ON(ret);
2176                 } else {
2177                         ret = custom_alloc_extent(root, sectorsize, 0, &key, 0);
2178                         if (ret)
2179                                 goto fail;
2180                         new_pos = key.objectid;
2181
2182                         if (cur_offset == extent_key->offset) {
2183                                 fd = root->fs_info->fs_devices->latest_bdev;
2184                                 readahead(fd, bytenr, num_bytes);
2185                         }
2186                         ret = copy_disk_extent(root, new_pos, bytenr,
2187                                                sectorsize);
2188                         if (ret)
2189                                 goto fail;
2190                         ret = set_extent_bits(reloc_tree, bytenr, sector_end,
2191                                               EXTENT_LOCKED, GFP_NOFS);
2192                         BUG_ON(ret);
2193                         ret = set_state_private(reloc_tree, bytenr, new_pos);
2194                         BUG_ON(ret);
2195                 }
2196
2197                 ret = block_iterate_proc(new_pos / sectorsize,
2198                                          cur_offset / sectorsize, &data);
2199                 if (ret < 0)
2200                         goto fail;
2201
2202                 cur_offset += sectorsize;
2203                 bytenr += sectorsize;
2204                 num_bytes -= sectorsize;
2205         }
2206
2207         if (data.num_blocks > 0) {
2208                 ret = record_file_blocks(&data, data.first_block,
2209                                          data.disk_block, data.num_blocks);
2210                 if (ret)
2211                         goto fail;
2212         }
2213
2214         key.objectid = extent_key->objectid;
2215         key.offset = 0;
2216         key.type =  BTRFS_INODE_ITEM_KEY;
2217         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
2218         if (ret)
2219                 goto fail;
2220
2221         leaf = path.nodes[0];
2222         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2223         write_extent_buffer(leaf, &inode, ptr, sizeof(inode));
2224         btrfs_mark_buffer_dirty(leaf);
2225         btrfs_release_path(&path);
2226
2227 fail:
2228         btrfs_release_path(&path);
2229         return ret;
2230 }
2231
2232 static int relocate_extents_range(struct btrfs_root *fs_root,
2233                                   struct btrfs_root *image_root,
2234                                   u64 start_byte, u64 end_byte)
2235 {
2236         struct btrfs_fs_info *info = fs_root->fs_info;
2237         struct btrfs_root *extent_root = info->extent_root;
2238         struct btrfs_root *cur_root = NULL;
2239         struct btrfs_trans_handle *trans;
2240         struct btrfs_extent_data_ref *dref;
2241         struct btrfs_extent_inline_ref *iref;
2242         struct btrfs_extent_item *ei;
2243         struct extent_buffer *leaf;
2244         struct btrfs_key key;
2245         struct btrfs_key extent_key;
2246         struct btrfs_path path;
2247         struct extent_io_tree reloc_tree;
2248         unsigned long ptr;
2249         unsigned long end;
2250         u64 cur_byte;
2251         u64 num_bytes;
2252         u64 ref_root;
2253         u64 num_extents;
2254         int pass = 0;
2255         int ret;
2256
2257         btrfs_init_path(&path);
2258         extent_io_tree_init(&reloc_tree);
2259
2260         key.objectid = start_byte;
2261         key.offset = 0;
2262         key.type = BTRFS_EXTENT_ITEM_KEY;
2263         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2264         if (ret < 0)
2265                 goto fail;
2266         if (ret > 0) {
2267                 ret = btrfs_previous_item(extent_root, &path, 0,
2268                                           BTRFS_EXTENT_ITEM_KEY);
2269                 if (ret < 0)
2270                         goto fail;
2271                 if (ret == 0) {
2272                         leaf = path.nodes[0];
2273                         btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2274                         if (key.objectid + key.offset > start_byte)
2275                                 start_byte = key.objectid;
2276                 }
2277         }
2278         btrfs_release_path(&path);
2279 again:
2280         cur_root = (pass % 2 == 0) ? image_root : fs_root;
2281         num_extents = 0;
2282
2283         trans = btrfs_start_transaction(cur_root, 1);
2284         BUG_ON(!trans);
2285
2286         cur_byte = start_byte;
2287         while (1) {
2288                 key.objectid = cur_byte;
2289                 key.offset = 0;
2290                 key.type = BTRFS_EXTENT_ITEM_KEY;
2291                 ret = btrfs_search_slot(trans, extent_root,
2292                                         &key, &path, 0, 0);
2293                 if (ret < 0)
2294                         goto fail;
2295 next:
2296                 leaf = path.nodes[0];
2297                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2298                         ret = btrfs_next_leaf(extent_root, &path);
2299                         if (ret < 0)
2300                                 goto fail;
2301                         if (ret > 0)
2302                                 break;
2303                         leaf = path.nodes[0];
2304                 }
2305
2306                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2307                 if (key.objectid < cur_byte ||
2308                     key.type != BTRFS_EXTENT_ITEM_KEY) {
2309                         path.slots[0]++;
2310                         goto next;
2311                 }
2312                 if (key.objectid >= end_byte)
2313                         break;
2314
2315                 num_extents++;
2316
2317                 cur_byte = key.objectid;
2318                 num_bytes = key.offset;
2319                 ei = btrfs_item_ptr(leaf, path.slots[0],
2320                                     struct btrfs_extent_item);
2321                 BUG_ON(!(btrfs_extent_flags(leaf, ei) &
2322                          BTRFS_EXTENT_FLAG_DATA));
2323
2324                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2325                 end = ptr + btrfs_item_size_nr(leaf, path.slots[0]);
2326
2327                 ptr += sizeof(struct btrfs_extent_item);
2328
2329                 while (ptr < end) {
2330                         iref = (struct btrfs_extent_inline_ref *)ptr;
2331                         key.type = btrfs_extent_inline_ref_type(leaf, iref);
2332                         BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
2333                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
2334                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
2335                         extent_key.objectid =
2336                                 btrfs_extent_data_ref_objectid(leaf, dref);
2337                         extent_key.offset =
2338                                 btrfs_extent_data_ref_offset(leaf, dref);
2339                         extent_key.type = BTRFS_EXTENT_DATA_KEY;
2340                         BUG_ON(btrfs_extent_data_ref_count(leaf, dref) != 1);
2341
2342                         if (ref_root == cur_root->root_key.objectid)
2343                                 break;
2344
2345                         ptr += btrfs_extent_inline_ref_size(key.type);
2346                 }
2347
2348                 if (ptr >= end) {
2349                         path.slots[0]++;
2350                         goto next;
2351                 }
2352
2353                 ret = relocate_one_reference(trans, cur_root, cur_byte,
2354                                              num_bytes, &extent_key,
2355                                              &reloc_tree);
2356                 if (ret < 0)
2357                         goto fail;
2358
2359                 cur_byte += num_bytes;
2360                 btrfs_release_path(&path);
2361
2362                 if (trans->blocks_used >= 4096) {
2363                         ret = btrfs_commit_transaction(trans, cur_root);
2364                         BUG_ON(ret);
2365                         trans = btrfs_start_transaction(cur_root, 1);
2366                         BUG_ON(!trans);
2367                 }
2368         }
2369         btrfs_release_path(&path);
2370
2371         ret = btrfs_commit_transaction(trans, cur_root);
2372         BUG_ON(ret);
2373
2374         if (num_extents > 0 && pass++ < 16)
2375                 goto again;
2376
2377         ret = (num_extents > 0) ? -1 : 0;
2378 fail:
2379         btrfs_release_path(&path);
2380         extent_io_tree_cleanup(&reloc_tree);
2381         return ret;
2382 }
2383
2384 /*
2385  * relocate data in system chunk
2386  */
2387 static int cleanup_sys_chunk(struct btrfs_root *fs_root,
2388                              struct btrfs_root *image_root)
2389 {
2390         struct btrfs_block_group_cache *cache;
2391         int i, ret = 0;
2392         u64 offset = 0;
2393         u64 end_byte;
2394
2395         while(1) {
2396                 cache = btrfs_lookup_block_group(fs_root->fs_info, offset);
2397                 if (!cache)
2398                         break;
2399
2400                 end_byte = cache->key.objectid + cache->key.offset;
2401                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2402                         ret = relocate_extents_range(fs_root, image_root,
2403                                                      cache->key.objectid,
2404                                                      end_byte);
2405                         if (ret)
2406                                 goto fail;
2407                 }
2408                 offset = end_byte;
2409         }
2410         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2411                 offset = btrfs_sb_offset(i);
2412                 offset &= ~((u64)BTRFS_STRIPE_LEN - 1);
2413
2414                 ret = relocate_extents_range(fs_root, image_root,
2415                                              offset, offset + BTRFS_STRIPE_LEN);
2416                 if (ret)
2417                         goto fail;
2418         }
2419         ret = 0;
2420 fail:
2421         return ret;
2422 }
2423
2424 static int fixup_chunk_mapping(struct btrfs_root *root)
2425 {
2426         struct btrfs_trans_handle *trans;
2427         struct btrfs_fs_info *info = root->fs_info;
2428         struct btrfs_root *chunk_root = info->chunk_root;
2429         struct extent_buffer *leaf;
2430         struct btrfs_key key;
2431         struct btrfs_path path;
2432         struct btrfs_chunk chunk;
2433         unsigned long ptr;
2434         u32 size;
2435         u64 type;
2436         int ret;
2437
2438         btrfs_init_path(&path);
2439
2440         trans = btrfs_start_transaction(root, 1);
2441         BUG_ON(!trans);
2442
2443         /*
2444          * recow the whole chunk tree. this will move all chunk tree blocks
2445          * into system block group.
2446          */
2447         memset(&key, 0, sizeof(key));
2448         while (1) {
2449                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2450                 if (ret < 0)
2451                         goto err;
2452
2453                 ret = btrfs_next_leaf(chunk_root, &path);
2454                 if (ret < 0)
2455                         goto err;
2456                 if (ret > 0)
2457                         break;
2458
2459                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2460                 btrfs_release_path(&path);
2461         }
2462         btrfs_release_path(&path);
2463
2464         /* fixup the system chunk array in super block */
2465         btrfs_set_super_sys_array_size(info->super_copy, 0);
2466
2467         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2468         key.offset = 0;
2469         key.type = BTRFS_CHUNK_ITEM_KEY;
2470
2471         ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 0);
2472         if (ret < 0)
2473                 goto err;
2474         BUG_ON(ret != 0);
2475         while(1) {
2476                 leaf = path.nodes[0];
2477                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2478                         ret = btrfs_next_leaf(chunk_root, &path);
2479                         if (ret < 0)
2480                                 goto err;
2481                         if (ret > 0)
2482                                 break;
2483                         leaf = path.nodes[0];
2484                 }
2485                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2486                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2487                         goto next;
2488
2489                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2490                 size = btrfs_item_size_nr(leaf, path.slots[0]);
2491                 BUG_ON(size != sizeof(chunk));
2492                 read_extent_buffer(leaf, &chunk, ptr, size);
2493                 type = btrfs_stack_chunk_type(&chunk);
2494
2495                 if (!(type & BTRFS_BLOCK_GROUP_SYSTEM))
2496                         goto next;
2497
2498                 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
2499                                              &chunk, size);
2500                 if (ret)
2501                         goto err;
2502 next:
2503                 path.slots[0]++;
2504         }
2505
2506         ret = btrfs_commit_transaction(trans, root);
2507         BUG_ON(ret);
2508 err:
2509         btrfs_release_path(&path);
2510         return ret;
2511 }
2512
2513 static const struct btrfs_convert_operations ext2_convert_ops = {
2514         .name                   = "ext2",
2515         .open_fs                = ext2_open_fs,
2516         .read_used_space        = ext2_read_used_space,
2517         .alloc_block            = ext2_alloc_block,
2518         .alloc_block_range      = ext2_alloc_block_range,
2519         .copy_inodes            = ext2_copy_inodes,
2520         .test_block             = ext2_test_block,
2521         .free_block             = ext2_free_block,
2522         .free_block_range       = ext2_free_block_range,
2523         .close_fs               = ext2_close_fs,
2524 };
2525
2526 static const struct btrfs_convert_operations *convert_operations[] = {
2527         &ext2_convert_ops,
2528 };
2529
2530 static int convert_open_fs(const char *devname,
2531                            struct btrfs_convert_context *cctx)
2532 {
2533         int i;
2534
2535         memset(cctx, 0, sizeof(*cctx));
2536
2537         for (i = 0; i < ARRAY_SIZE(convert_operations); i++) {
2538                 int ret = convert_operations[i]->open_fs(cctx, devname);
2539
2540                 if (ret == 0) {
2541                         cctx->convert_ops = convert_operations[i];
2542                         return ret;
2543                 }
2544         }
2545
2546         fprintf(stderr, "No file system found to convert.\n");
2547         return -1;
2548 }
2549
2550 /*
2551  * Remove one reserve range from given cache tree
2552  * if min_stripe_size is non-zero, it will ensure for split case,
2553  * all its split cache extent is no smaller than @min_strip_size / 2.
2554  */
2555 static int wipe_one_reserved_range(struct cache_tree *tree,
2556                                    u64 start, u64 len, u64 min_stripe_size,
2557                                    int ensure_size)
2558 {
2559         struct cache_extent *cache;
2560         int ret;
2561
2562         BUG_ON(ensure_size && min_stripe_size == 0);
2563         /*
2564          * The logical here is simplified to handle special cases only
2565          * So we don't need to consider merge case for ensure_size
2566          */
2567         BUG_ON(min_stripe_size && (min_stripe_size < len * 2 ||
2568                min_stripe_size / 2 < BTRFS_STRIPE_LEN));
2569
2570         /* Also, wipe range should already be aligned */
2571         BUG_ON(start != round_down(start, BTRFS_STRIPE_LEN) ||
2572                start + len != round_up(start + len, BTRFS_STRIPE_LEN));
2573
2574         min_stripe_size /= 2;
2575
2576         cache = lookup_cache_extent(tree, start, len);
2577         if (!cache)
2578                 return 0;
2579
2580         if (start <= cache->start) {
2581                 /*
2582                  *      |--------cache---------|
2583                  * |-wipe-|
2584                  */
2585                 BUG_ON(start + len <= cache->start);
2586
2587                 /*
2588                  * The wipe size is smaller than min_stripe_size / 2,
2589                  * so the result length should still meet min_stripe_size
2590                  * And no need to do alignment
2591                  */
2592                 cache->size -= (start + len - cache->start);
2593                 if (cache->size == 0) {
2594                         remove_cache_extent(tree, cache);
2595                         free(cache);
2596                         return 0;
2597                 }
2598
2599                 BUG_ON(ensure_size && cache->size < min_stripe_size);
2600
2601                 cache->start = start + len;
2602                 return 0;
2603         } else if (start > cache->start && start + len < cache->start +
2604                    cache->size) {
2605                 /*
2606                  * |-------cache-----|
2607                  *      |-wipe-|
2608                  */
2609                 u64 old_len = cache->size;
2610                 u64 insert_start = start + len;
2611                 u64 insert_len;
2612
2613                 cache->size = start - cache->start;
2614                 if (ensure_size)
2615                         cache->size = max(cache->size, min_stripe_size);
2616                 cache->start = start - cache->size;
2617
2618                 /* And insert the new one */
2619                 insert_len = old_len - start - len;
2620                 if (ensure_size)
2621                         insert_len = max(insert_len, min_stripe_size);
2622
2623                 ret = add_merge_cache_extent(tree, insert_start, insert_len);
2624                 return ret;
2625         }
2626         /*
2627          * |----cache-----|
2628          *              |--wipe-|
2629          * Wipe len should be small enough and no need to expand the
2630          * remaining extent
2631          */
2632         cache->size = start - cache->start;
2633         BUG_ON(ensure_size && cache->size < min_stripe_size);
2634         return 0;
2635 }
2636
2637 /*
2638  * Remove reserved ranges from given cache_tree
2639  *
2640  * It will remove the following ranges
2641  * 1) 0~1M
2642  * 2) 2nd superblock, +64K (make sure chunks are 64K aligned)
2643  * 3) 3rd superblock, +64K
2644  *
2645  * @min_stripe must be given for safety check
2646  * and if @ensure_size is given, it will ensure affected cache_extent will be
2647  * larger than min_stripe_size
2648  */
2649 static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size,
2650                                 int ensure_size)
2651 {
2652         int ret;
2653
2654         ret = wipe_one_reserved_range(tree, 0, 1024 * 1024, min_stripe_size,
2655                                       ensure_size);
2656         if (ret < 0)
2657                 return ret;
2658         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(1),
2659                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
2660         if (ret < 0)
2661                 return ret;
2662         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(2),
2663                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
2664         return ret;
2665 }
2666
2667 static int calculate_available_space(struct btrfs_convert_context *cctx)
2668 {
2669         struct cache_tree *used = &cctx->used;
2670         struct cache_tree *data_chunks = &cctx->data_chunks;
2671         struct cache_tree *free = &cctx->free;
2672         struct cache_extent *cache;
2673         u64 cur_off = 0;
2674         /*
2675          * Twice the minimal chunk size, to allow later wipe_reserved_ranges()
2676          * works without need to consider overlap
2677          */
2678         u64 min_stripe_size = 2 * 16 * 1024 * 1024;
2679         int ret;
2680
2681         /* Calculate data_chunks */
2682         for (cache = first_cache_extent(used); cache;
2683              cache = next_cache_extent(cache)) {
2684                 u64 cur_len;
2685
2686                 if (cache->start + cache->size < cur_off)
2687                         continue;
2688                 if (cache->start > cur_off + min_stripe_size)
2689                         cur_off = cache->start;
2690                 cur_len = max(cache->start + cache->size - cur_off,
2691                               min_stripe_size);
2692                 ret = add_merge_cache_extent(data_chunks, cur_off, cur_len);
2693                 if (ret < 0)
2694                         goto out;
2695                 cur_off += cur_len;
2696         }
2697         /*
2698          * remove reserved ranges, so we won't ever bother relocating an old
2699          * filesystem extent to other place.
2700          */
2701         ret = wipe_reserved_ranges(data_chunks, min_stripe_size, 1);
2702         if (ret < 0)
2703                 goto out;
2704
2705         cur_off = 0;
2706         /*
2707          * Calculate free space
2708          * Always round up the start bytenr, to avoid metadata extent corss
2709          * stripe boundary, as later mkfs_convert() won't have all the extent
2710          * allocation check
2711          */
2712         for (cache = first_cache_extent(data_chunks); cache;
2713              cache = next_cache_extent(cache)) {
2714                 if (cache->start < cur_off)
2715                         continue;
2716                 if (cache->start > cur_off) {
2717                         u64 insert_start;
2718                         u64 len;
2719
2720                         len = cache->start - round_up(cur_off,
2721                                                       BTRFS_STRIPE_LEN);
2722                         insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
2723
2724                         ret = add_merge_cache_extent(free, insert_start, len);
2725                         if (ret < 0)
2726                                 goto out;
2727                 }
2728                 cur_off = cache->start + cache->size;
2729         }
2730         /* Don't forget the last range */
2731         if (cctx->total_bytes > cur_off) {
2732                 u64 len = cctx->total_bytes - cur_off;
2733                 u64 insert_start;
2734
2735                 insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
2736
2737                 ret = add_merge_cache_extent(free, insert_start, len);
2738                 if (ret < 0)
2739                         goto out;
2740         }
2741
2742         /* Remove reserved bytes */
2743         ret = wipe_reserved_ranges(free, min_stripe_size, 0);
2744 out:
2745         return ret;
2746 }
2747 /*
2748  * Read used space, and since we have the used space,
2749  * calcuate data_chunks and free for later mkfs
2750  */
2751 static int convert_read_used_space(struct btrfs_convert_context *cctx)
2752 {
2753         int ret;
2754
2755         ret = cctx->convert_ops->read_used_space(cctx);
2756         if (ret)
2757                 return ret;
2758
2759         ret = calculate_available_space(cctx);
2760         return ret;
2761 }
2762
2763 static int do_convert(const char *devname, int datacsum, int packing, int noxattr,
2764                 u32 nodesize, int copylabel, const char *fslabel, int progress,
2765                 u64 features)
2766 {
2767         int i, ret, blocks_per_node;
2768         int fd = -1;
2769         int is_btrfs = 0;
2770         u32 blocksize;
2771         u64 blocks[7];
2772         u64 total_bytes;
2773         u64 super_bytenr;
2774         struct btrfs_root *root;
2775         struct btrfs_root *image_root;
2776         struct btrfs_convert_context cctx;
2777         char *subvol_name = NULL;
2778         struct task_ctx ctx;
2779         char features_buf[64];
2780         struct btrfs_mkfs_config mkfs_cfg;
2781
2782         init_convert_context(&cctx);
2783         ret = convert_open_fs(devname, &cctx);
2784         if (ret)
2785                 goto fail;
2786         ret = convert_read_used_space(&cctx);
2787         if (ret)
2788                 goto fail;
2789
2790         blocksize = cctx.blocksize;
2791         total_bytes = (u64)blocksize * (u64)cctx.block_count;
2792         if (blocksize < 4096) {
2793                 fprintf(stderr, "block size is too small\n");
2794                 goto fail;
2795         }
2796         if (btrfs_check_nodesize(nodesize, blocksize, features))
2797                 goto fail;
2798         blocks_per_node = nodesize / blocksize;
2799         ret = -blocks_per_node;
2800         for (i = 0; i < 7; i++) {
2801                 if (nodesize == blocksize)
2802                         ret = convert_alloc_block(&cctx, 0, blocks + i);
2803                 else
2804                         ret = convert_alloc_block_range(&cctx,
2805                                         ret + blocks_per_node, blocks_per_node,
2806                                         blocks + i);
2807                 if (ret) {
2808                         fprintf(stderr, "not enough free space\n");
2809                         goto fail;
2810                 }
2811                 blocks[i] *= blocksize;
2812         }
2813         super_bytenr = blocks[0];
2814         fd = open(devname, O_RDWR);
2815         if (fd < 0) {
2816                 fprintf(stderr, "unable to open %s\n", devname);
2817                 goto fail;
2818         }
2819         btrfs_parse_features_to_string(features_buf, features);
2820         if (features == BTRFS_MKFS_DEFAULT_FEATURES)
2821                 strcat(features_buf, " (default)");
2822
2823         printf("create btrfs filesystem:\n");
2824         printf("\tblocksize: %u\n", blocksize);
2825         printf("\tnodesize:  %u\n", nodesize);
2826         printf("\tfeatures:  %s\n", features_buf);
2827
2828         mkfs_cfg.label = cctx.volume_name;
2829         mkfs_cfg.fs_uuid = NULL;
2830         memcpy(mkfs_cfg.blocks, blocks, sizeof(blocks));
2831         mkfs_cfg.num_bytes = total_bytes;
2832         mkfs_cfg.nodesize = nodesize;
2833         mkfs_cfg.sectorsize = blocksize;
2834         mkfs_cfg.stripesize = blocksize;
2835         mkfs_cfg.features = features;
2836
2837         ret = make_btrfs(fd, &mkfs_cfg);
2838         if (ret) {
2839                 fprintf(stderr, "unable to create initial ctree: %s\n",
2840                         strerror(-ret));
2841                 goto fail;
2842         }
2843         /* create a system chunk that maps the whole device */
2844         ret = prepare_system_chunk(fd, super_bytenr);
2845         if (ret) {
2846                 fprintf(stderr, "unable to update system chunk\n");
2847                 goto fail;
2848         }
2849         root = open_ctree_fd(fd, devname, super_bytenr, OPEN_CTREE_WRITES);
2850         if (!root) {
2851                 fprintf(stderr, "unable to open ctree\n");
2852                 goto fail;
2853         }
2854         ret = cache_free_extents(root, &cctx);
2855         if (ret) {
2856                 fprintf(stderr, "error during cache_free_extents %d\n", ret);
2857                 goto fail;
2858         }
2859         root->fs_info->extent_ops = &extent_ops;
2860         /* recover block allocation bitmap */
2861         for (i = 0; i < 7; i++) {
2862                 blocks[i] /= blocksize;
2863                 if (nodesize == blocksize)
2864                         convert_free_block(&cctx, blocks[i]);
2865                 else
2866                         convert_free_block_range(&cctx, blocks[i],
2867                                         blocks_per_node);
2868         }
2869         ret = init_btrfs(root);
2870         if (ret) {
2871                 fprintf(stderr, "unable to setup the root tree\n");
2872                 goto fail;
2873         }
2874         printf("creating btrfs metadata.\n");
2875         ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
2876         ctx.cur_copy_inodes = 0;
2877
2878         if (progress) {
2879                 ctx.info = task_init(print_copied_inodes, after_copied_inodes, &ctx);
2880                 task_start(ctx.info);
2881         }
2882         ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx);
2883         if (ret) {
2884                 fprintf(stderr, "error during copy_inodes %d\n", ret);
2885                 goto fail;
2886         }
2887         if (progress) {
2888                 task_stop(ctx.info);
2889                 task_deinit(ctx.info);
2890         }
2891
2892         printf("creating %s image file.\n", cctx.convert_ops->name);
2893         ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name);
2894         if (ret < 0) {
2895                 fprintf(stderr, "error allocating subvolume name: %s_saved\n",
2896                         cctx.convert_ops->name);
2897                 goto fail;
2898         }
2899
2900         image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
2901
2902         free(subvol_name);
2903
2904         if (!image_root) {
2905                 fprintf(stderr, "unable to create subvol\n");
2906                 goto fail;
2907         }
2908         ret = create_image(&cctx, image_root, "image", datacsum);
2909         if (ret) {
2910                 fprintf(stderr, "error during create_image %d\n", ret);
2911                 goto fail;
2912         }
2913         memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
2914         if (copylabel == 1) {
2915                 __strncpy_null(root->fs_info->super_copy->label,
2916                                 cctx.volume_name, BTRFS_LABEL_SIZE - 1);
2917                 fprintf(stderr, "copy label '%s'\n",
2918                                 root->fs_info->super_copy->label);
2919         } else if (copylabel == -1) {
2920                 strcpy(root->fs_info->super_copy->label, fslabel);
2921                 fprintf(stderr, "set label to '%s'\n", fslabel);
2922         }
2923
2924         printf("cleaning up system chunk.\n");
2925         ret = cleanup_sys_chunk(root, image_root);
2926         if (ret) {
2927                 fprintf(stderr, "error during cleanup_sys_chunk %d\n", ret);
2928                 goto fail;
2929         }
2930         ret = close_ctree(root);
2931         if (ret) {
2932                 fprintf(stderr, "error during close_ctree %d\n", ret);
2933                 goto fail;
2934         }
2935         convert_close_fs(&cctx);
2936         clean_convert_context(&cctx);
2937
2938         /*
2939          * If this step succeed, we get a mountable btrfs. Otherwise
2940          * the source fs is left unchanged.
2941          */
2942         ret = migrate_super_block(fd, super_bytenr, blocksize);
2943         if (ret) {
2944                 fprintf(stderr, "unable to migrate super block\n");
2945                 goto fail;
2946         }
2947         is_btrfs = 1;
2948
2949         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
2950         if (!root) {
2951                 fprintf(stderr, "unable to open ctree\n");
2952                 goto fail;
2953         }
2954         /* move chunk tree into system chunk. */
2955         ret = fixup_chunk_mapping(root);
2956         if (ret) {
2957                 fprintf(stderr, "error during fixup_chunk_tree\n");
2958                 goto fail;
2959         }
2960         ret = close_ctree(root);
2961         close(fd);
2962
2963         printf("conversion complete.\n");
2964         return 0;
2965 fail:
2966         clean_convert_context(&cctx);
2967         if (fd != -1)
2968                 close(fd);
2969         if (is_btrfs)
2970                 fprintf(stderr,
2971                         "WARNING: an error occured during chunk mapping fixup, filesystem mountable but not finalized\n");
2972         else
2973                 fprintf(stderr, "conversion aborted\n");
2974         return -1;
2975 }
2976
2977 static int may_rollback(struct btrfs_root *root)
2978 {
2979         struct btrfs_fs_info *info = root->fs_info;
2980         struct btrfs_multi_bio *multi = NULL;
2981         u64 bytenr;
2982         u64 length;
2983         u64 physical;
2984         u64 total_bytes;
2985         int num_stripes;
2986         int ret;
2987
2988         if (btrfs_super_num_devices(info->super_copy) != 1)
2989                 goto fail;
2990
2991         bytenr = BTRFS_SUPER_INFO_OFFSET;
2992         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
2993
2994         while (1) {
2995                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
2996                                       &length, &multi, 0, NULL);
2997                 if (ret) {
2998                         if (ret == -ENOENT) {
2999                                 /* removed block group at the tail */
3000                                 if (length == (u64)-1)
3001                                         break;
3002
3003                                 /* removed block group in the middle */
3004                                 goto next;
3005                         }
3006                         goto fail;
3007                 }
3008
3009                 num_stripes = multi->num_stripes;
3010                 physical = multi->stripes[0].physical;
3011                 kfree(multi);
3012
3013                 if (num_stripes != 1 || physical != bytenr)
3014                         goto fail;
3015 next:
3016                 bytenr += length;
3017                 if (bytenr >= total_bytes)
3018                         break;
3019         }
3020         return 0;
3021 fail:
3022         return -1;
3023 }
3024
3025 static int do_rollback(const char *devname)
3026 {
3027         int fd = -1;
3028         int ret;
3029         int i;
3030         struct btrfs_root *root;
3031         struct btrfs_root *image_root;
3032         struct btrfs_root *chunk_root;
3033         struct btrfs_dir_item *dir;
3034         struct btrfs_inode_item *inode;
3035         struct btrfs_file_extent_item *fi;
3036         struct btrfs_trans_handle *trans;
3037         struct extent_buffer *leaf;
3038         struct btrfs_block_group_cache *cache1;
3039         struct btrfs_block_group_cache *cache2;
3040         struct btrfs_key key;
3041         struct btrfs_path path;
3042         struct extent_io_tree io_tree;
3043         char *buf = NULL;
3044         char *name;
3045         u64 bytenr;
3046         u64 num_bytes;
3047         u64 root_dir;
3048         u64 objectid;
3049         u64 offset;
3050         u64 start;
3051         u64 end;
3052         u64 sb_bytenr;
3053         u64 first_free;
3054         u64 total_bytes;
3055         u32 sectorsize;
3056
3057         extent_io_tree_init(&io_tree);
3058
3059         fd = open(devname, O_RDWR);
3060         if (fd < 0) {
3061                 fprintf(stderr, "unable to open %s\n", devname);
3062                 goto fail;
3063         }
3064         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
3065         if (!root) {
3066                 fprintf(stderr, "unable to open ctree\n");
3067                 goto fail;
3068         }
3069         ret = may_rollback(root);
3070         if (ret < 0) {
3071                 fprintf(stderr, "unable to do rollback\n");
3072                 goto fail;
3073         }
3074
3075         sectorsize = root->sectorsize;
3076         buf = malloc(sectorsize);
3077         if (!buf) {
3078                 fprintf(stderr, "unable to allocate memory\n");
3079                 goto fail;
3080         }
3081
3082         btrfs_init_path(&path);
3083
3084         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
3085         key.type = BTRFS_ROOT_BACKREF_KEY;
3086         key.offset = BTRFS_FS_TREE_OBJECTID;
3087         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0,
3088                                 0);
3089         btrfs_release_path(&path);
3090         if (ret > 0) {
3091                 fprintf(stderr,
3092                 "ERROR: unable to convert ext2 image subvolume, is it deleted?\n");
3093                 goto fail;
3094         } else if (ret < 0) {
3095                 fprintf(stderr,
3096                         "ERROR: unable to open ext2_saved, id=%llu: %s\n",
3097                         (unsigned long long)key.objectid, strerror(-ret));
3098                 goto fail;
3099         }
3100
3101         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
3102         key.type = BTRFS_ROOT_ITEM_KEY;
3103         key.offset = (u64)-1;
3104         image_root = btrfs_read_fs_root(root->fs_info, &key);
3105         if (!image_root || IS_ERR(image_root)) {
3106                 fprintf(stderr, "unable to open subvol %llu\n",
3107                         (unsigned long long)key.objectid);
3108                 goto fail;
3109         }
3110
3111         name = "image";
3112         root_dir = btrfs_root_dirid(&root->root_item);
3113         dir = btrfs_lookup_dir_item(NULL, image_root, &path,
3114                                    root_dir, name, strlen(name), 0);
3115         if (!dir || IS_ERR(dir)) {
3116                 fprintf(stderr, "unable to find file %s\n", name);
3117                 goto fail;
3118         }
3119         leaf = path.nodes[0];
3120         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
3121         btrfs_release_path(&path);
3122
3123         objectid = key.objectid;
3124
3125         ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0);
3126         if (ret) {
3127                 fprintf(stderr, "unable to find inode item\n");
3128                 goto fail;
3129         }
3130         leaf = path.nodes[0];
3131         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
3132         total_bytes = btrfs_inode_size(leaf, inode);
3133         btrfs_release_path(&path);
3134
3135         key.objectid = objectid;
3136         key.offset = 0;
3137         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3138         ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0);
3139         if (ret != 0) {
3140                 fprintf(stderr, "unable to find first file extent\n");
3141                 btrfs_release_path(&path);
3142                 goto fail;
3143         }
3144
3145         /* build mapping tree for the relocated blocks */
3146         for (offset = 0; offset < total_bytes; ) {
3147                 leaf = path.nodes[0];
3148                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3149                         ret = btrfs_next_leaf(root, &path);
3150                         if (ret != 0)
3151                                 break;  
3152                         continue;
3153                 }
3154
3155                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3156                 if (key.objectid != objectid || key.offset != offset ||
3157                     btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3158                         break;
3159
3160                 fi = btrfs_item_ptr(leaf, path.slots[0],
3161                                     struct btrfs_file_extent_item);
3162                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
3163                         break;
3164                 if (btrfs_file_extent_compression(leaf, fi) ||
3165                     btrfs_file_extent_encryption(leaf, fi) ||
3166                     btrfs_file_extent_other_encoding(leaf, fi))
3167                         break;
3168
3169                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
3170                 /* skip holes and direct mapped extents */
3171                 if (bytenr == 0 || bytenr == offset)
3172                         goto next_extent;
3173
3174                 bytenr += btrfs_file_extent_offset(leaf, fi);
3175                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3176
3177                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
3178                 cache2 =  btrfs_lookup_block_group(root->fs_info,
3179                                                    offset + num_bytes - 1);
3180                 if (!cache1 || cache1 != cache2 ||
3181                     (!(cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
3182                      !intersect_with_sb(offset, num_bytes)))
3183                         break;
3184
3185                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
3186                                 EXTENT_LOCKED, GFP_NOFS);
3187                 set_state_private(&io_tree, offset, bytenr);
3188 next_extent:
3189                 offset += btrfs_file_extent_num_bytes(leaf, fi);
3190                 path.slots[0]++;
3191         }
3192         btrfs_release_path(&path);
3193
3194         if (offset < total_bytes) {
3195                 fprintf(stderr, "unable to build extent mapping\n");
3196                 goto fail;
3197         }
3198
3199         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
3200         first_free &= ~((u64)sectorsize - 1);
3201         /* backup for extent #0 should exist */
3202         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
3203                 fprintf(stderr, "no backup for the first extent\n");
3204                 goto fail;
3205         }
3206         /* force no allocation from system block group */
3207         root->fs_info->system_allocs = -1;
3208         trans = btrfs_start_transaction(root, 1);
3209         BUG_ON(!trans);
3210         /*
3211          * recow the whole chunk tree, this will remove all chunk tree blocks
3212          * from system block group
3213          */
3214         chunk_root = root->fs_info->chunk_root;
3215         memset(&key, 0, sizeof(key));
3216         while (1) {
3217                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
3218                 if (ret < 0)
3219                         break;
3220
3221                 ret = btrfs_next_leaf(chunk_root, &path);
3222                 if (ret)
3223                         break;
3224
3225                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
3226                 btrfs_release_path(&path);
3227         }
3228         btrfs_release_path(&path);
3229
3230         offset = 0;
3231         num_bytes = 0;
3232         while(1) {
3233                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
3234                 if (!cache1)
3235                         break;
3236
3237                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
3238                         num_bytes += btrfs_block_group_used(&cache1->item);
3239
3240                 offset = cache1->key.objectid + cache1->key.offset;
3241         }
3242         /* only extent #0 left in system block group? */
3243         if (num_bytes > first_free) {
3244                 fprintf(stderr, "unable to empty system block group\n");
3245                 goto fail;
3246         }
3247         /* create a system chunk that maps the whole device */
3248         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
3249         if (ret) {
3250                 fprintf(stderr, "unable to update system chunk\n");
3251                 goto fail;
3252         }
3253
3254         ret = btrfs_commit_transaction(trans, root);
3255         BUG_ON(ret);
3256
3257         ret = close_ctree(root);
3258         if (ret) {
3259                 fprintf(stderr, "error during close_ctree %d\n", ret);
3260                 goto fail;
3261         }
3262
3263         /* zero btrfs super block mirrors */
3264         memset(buf, 0, sectorsize);
3265         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3266                 bytenr = btrfs_sb_offset(i);
3267                 if (bytenr >= total_bytes)
3268                         break;
3269                 ret = pwrite(fd, buf, sectorsize, bytenr);
3270                 if (ret != sectorsize) {
3271                         fprintf(stderr,
3272                                 "error during zeroing superblock %d: %d\n",
3273                                 i, ret);
3274                         goto fail;
3275                 }
3276         }
3277
3278         sb_bytenr = (u64)-1;
3279         /* copy all relocated blocks back */
3280         while(1) {
3281                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
3282                                             EXTENT_LOCKED);
3283                 if (ret)
3284                         break;
3285
3286                 ret = get_state_private(&io_tree, start, &bytenr);
3287                 BUG_ON(ret);
3288
3289                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
3290                                   GFP_NOFS);
3291
3292                 while (start <= end) {
3293                         if (start == BTRFS_SUPER_INFO_OFFSET) {
3294                                 sb_bytenr = bytenr;
3295                                 goto next_sector;
3296                         }
3297                         ret = pread(fd, buf, sectorsize, bytenr);
3298                         if (ret < 0) {
3299                                 fprintf(stderr, "error during pread %d\n", ret);
3300                                 goto fail;
3301                         }
3302                         BUG_ON(ret != sectorsize);
3303                         ret = pwrite(fd, buf, sectorsize, start);
3304                         if (ret < 0) {
3305                                 fprintf(stderr, "error during pwrite %d\n", ret);
3306                                 goto fail;
3307                         }
3308                         BUG_ON(ret != sectorsize);
3309 next_sector:
3310                         start += sectorsize;
3311                         bytenr += sectorsize;
3312                 }
3313         }
3314
3315         ret = fsync(fd);
3316         if (ret) {
3317                 fprintf(stderr, "error during fsync %d\n", ret);
3318                 goto fail;
3319         }
3320         /*
3321          * finally, overwrite btrfs super block.
3322          */
3323         ret = pread(fd, buf, sectorsize, sb_bytenr);
3324         if (ret < 0) {
3325                 fprintf(stderr, "error during pread %d\n", ret);
3326                 goto fail;
3327         }
3328         BUG_ON(ret != sectorsize);
3329         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
3330         if (ret < 0) {
3331                 fprintf(stderr, "error during pwrite %d\n", ret);
3332                 goto fail;
3333         }
3334         BUG_ON(ret != sectorsize);
3335         ret = fsync(fd);
3336         if (ret) {
3337                 fprintf(stderr, "error during fsync %d\n", ret);
3338                 goto fail;
3339         }
3340
3341         close(fd);
3342         free(buf);
3343         extent_io_tree_cleanup(&io_tree);
3344         printf("rollback complete.\n");
3345         return 0;
3346
3347 fail:
3348         if (fd != -1)
3349                 close(fd);
3350         free(buf);
3351         fprintf(stderr, "rollback aborted.\n");
3352         return -1;
3353 }
3354
3355 static void print_usage(void)
3356 {
3357         printf("usage: btrfs-convert [options] device\n");
3358         printf("options:\n");
3359         printf("\t-d|--no-datasum        disable data checksum, sets NODATASUM\n");
3360         printf("\t-i|--no-xattr          ignore xattrs and ACLs\n");
3361         printf("\t-n|--no-inline         disable inlining of small files to metadata\n");
3362         printf("\t-N|--nodesize SIZE     set filesystem metadata nodesize\n");
3363         printf("\t-r|--rollback          roll back to the original filesystem\n");
3364         printf("\t-l|--label LABEL       set filesystem label\n");
3365         printf("\t-L|--copy-label        use label from converted filesystem\n");
3366         printf("\t-p|--progress          show converting progress (default)\n");
3367         printf("\t-O|--features LIST     comma separated list of filesystem features\n");
3368         printf("\t--no-progress          show only overview, not the detailed progress\n");
3369 }
3370
3371 int main(int argc, char *argv[])
3372 {
3373         int ret;
3374         int packing = 1;
3375         int noxattr = 0;
3376         int datacsum = 1;
3377         u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
3378                         BTRFS_MKFS_DEFAULT_NODE_SIZE);
3379         int rollback = 0;
3380         int copylabel = 0;
3381         int usage_error = 0;
3382         int progress = 1;
3383         char *file;
3384         char fslabel[BTRFS_LABEL_SIZE];
3385         u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
3386
3387         while(1) {
3388                 enum { GETOPT_VAL_NO_PROGRESS = 256 };
3389                 static const struct option long_options[] = {
3390                         { "no-progress", no_argument, NULL,
3391                                 GETOPT_VAL_NO_PROGRESS },
3392                         { "no-datasum", no_argument, NULL, 'd' },
3393                         { "no-inline", no_argument, NULL, 'n' },
3394                         { "no-xattr", no_argument, NULL, 'i' },
3395                         { "rollback", no_argument, NULL, 'r' },
3396                         { "features", required_argument, NULL, 'O' },
3397                         { "progress", no_argument, NULL, 'p' },
3398                         { "label", required_argument, NULL, 'l' },
3399                         { "copy-label", no_argument, NULL, 'L' },
3400                         { "nodesize", required_argument, NULL, 'N' },
3401                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
3402                         { NULL, 0, NULL, 0 }
3403                 };
3404                 int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL);
3405
3406                 if (c < 0)
3407                         break;
3408                 switch(c) {
3409                         case 'd':
3410                                 datacsum = 0;
3411                                 break;
3412                         case 'i':
3413                                 noxattr = 1;
3414                                 break;
3415                         case 'n':
3416                                 packing = 0;
3417                                 break;
3418                         case 'N':
3419                                 nodesize = parse_size(optarg);
3420                                 break;
3421                         case 'r':
3422                                 rollback = 1;
3423                                 break;
3424                         case 'l':
3425                                 copylabel = -1;
3426                                 if (strlen(optarg) >= BTRFS_LABEL_SIZE) {
3427                                         fprintf(stderr,
3428                                 "WARNING: label too long, trimmed to %d bytes\n",
3429                                                 BTRFS_LABEL_SIZE - 1);
3430                                 }
3431                                 __strncpy_null(fslabel, optarg, BTRFS_LABEL_SIZE - 1);
3432                                 break;
3433                         case 'L':
3434                                 copylabel = 1;
3435                                 break;
3436                         case 'p':
3437                                 progress = 1;
3438                                 break;
3439                         case 'O': {
3440                                 char *orig = strdup(optarg);
3441                                 char *tmp = orig;
3442
3443                                 tmp = btrfs_parse_fs_features(tmp, &features);
3444                                 if (tmp) {
3445                                         fprintf(stderr,
3446                                                 "Unrecognized filesystem feature '%s'\n",
3447                                                         tmp);
3448                                         free(orig);
3449                                         exit(1);
3450                                 }
3451                                 free(orig);
3452                                 if (features & BTRFS_FEATURE_LIST_ALL) {
3453                                         btrfs_list_all_fs_features(
3454                                                 ~BTRFS_CONVERT_ALLOWED_FEATURES);
3455                                         exit(0);
3456                                 }
3457                                 if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) {
3458                                         char buf[64];
3459
3460                                         btrfs_parse_features_to_string(buf,
3461                                                 features & ~BTRFS_CONVERT_ALLOWED_FEATURES);
3462                                         fprintf(stderr,
3463                                                 "ERROR: features not allowed for convert: %s\n",
3464                                                 buf);
3465                                         exit(1);
3466                                 }
3467
3468                                 break;
3469                                 }
3470                         case GETOPT_VAL_NO_PROGRESS:
3471                                 progress = 0;
3472                                 break;
3473                         case GETOPT_VAL_HELP:
3474                         default:
3475                                 print_usage();
3476                                 return c != GETOPT_VAL_HELP;
3477                 }
3478         }
3479         set_argv0(argv);
3480         if (check_argc_exact(argc - optind, 1)) {
3481                 print_usage();
3482                 return 1;
3483         }
3484
3485         if (rollback && (!datacsum || noxattr || !packing)) {
3486                 fprintf(stderr,
3487                         "Usage error: -d, -i, -n options do not apply to rollback\n");
3488                 usage_error++;
3489         }
3490
3491         if (usage_error) {
3492                 print_usage();
3493                 return 1;
3494         }
3495
3496         file = argv[optind];
3497         ret = check_mounted(file);
3498         if (ret < 0) {
3499                 fprintf(stderr, "Could not check mount status: %s\n",
3500                         strerror(-ret));
3501                 return 1;
3502         } else if (ret) {
3503                 fprintf(stderr, "%s is mounted\n", file);
3504                 return 1;
3505         }
3506
3507         if (rollback) {
3508                 ret = do_rollback(file);
3509         } else {
3510                 ret = do_convert(file, datacsum, packing, noxattr, nodesize,
3511                                 copylabel, fslabel, progress, features);
3512         }
3513         if (ret)
3514                 return 1;
3515         return 0;
3516 }