btrfs-progs: convert: Strictly avoid meta or system chunk allocation
[platform/upstream/btrfs-progs.git] / btrfs-convert.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include "kerncompat.h"
20
21 #include <sys/ioctl.h>
22 #include <sys/mount.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <unistd.h>
29 #include <uuid/uuid.h>
30 #include <linux/limits.h>
31 #include <getopt.h>
32
33 #include "ctree.h"
34 #include "disk-io.h"
35 #include "volumes.h"
36 #include "transaction.h"
37 #include "crc32c.h"
38 #include "utils.h"
39 #include "task-utils.h"
40 #include <ext2fs/ext2_fs.h>
41 #include <ext2fs/ext2fs.h>
42 #include <ext2fs/ext2_ext_attr.h>
43
44 #define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO)
45 #define CONV_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID
46
47 /*
48  * Compatibility code for e2fsprogs 1.41 which doesn't support RO compat flag
49  * BIGALLOC.
50  * Unlike normal RO compat flag, BIGALLOC affects how e2fsprogs check used
51  * space, and btrfs-convert heavily relies on it.
52  */
53 #ifdef HAVE_OLD_E2FSPROGS
54 #define EXT2FS_CLUSTER_RATIO(fs)        (1)
55 #define EXT2_CLUSTERS_PER_GROUP(s)      (EXT2_BLOCKS_PER_GROUP(s))
56 #define EXT2FS_B2C(fs, blk)             (blk)
57 #endif
58
59 struct task_ctx {
60         uint32_t max_copy_inodes;
61         uint32_t cur_copy_inodes;
62         struct task_info *info;
63 };
64
65 static void *print_copied_inodes(void *p)
66 {
67         struct task_ctx *priv = p;
68         const char work_indicator[] = { '.', 'o', 'O', 'o' };
69         uint32_t count = 0;
70
71         task_period_start(priv->info, 1000 /* 1s */);
72         while (1) {
73                 count++;
74                 printf("copy inodes [%c] [%10d/%10d]\r",
75                        work_indicator[count % 4], priv->cur_copy_inodes,
76                        priv->max_copy_inodes);
77                 fflush(stdout);
78                 task_period_wait(priv->info);
79         }
80
81         return NULL;
82 }
83
84 static int after_copied_inodes(void *p)
85 {
86         printf("\n");
87         fflush(stdout);
88
89         return 0;
90 }
91
92 struct btrfs_convert_context;
93 struct btrfs_convert_operations {
94         const char *name;
95         int (*open_fs)(struct btrfs_convert_context *cctx, const char *devname);
96         int (*read_used_space)(struct btrfs_convert_context *cctx);
97         int (*alloc_block)(struct btrfs_convert_context *cctx, u64 goal,
98                            u64 *block_ret);
99         int (*alloc_block_range)(struct btrfs_convert_context *cctx, u64 goal,
100                            int num, u64 *block_ret);
101         int (*test_block)(struct btrfs_convert_context *cctx, u64 block);
102         void (*free_block)(struct btrfs_convert_context *cctx, u64 block);
103         void (*free_block_range)(struct btrfs_convert_context *cctx, u64 block,
104                            int num);
105         int (*copy_inodes)(struct btrfs_convert_context *cctx,
106                          struct btrfs_root *root, int datacsum,
107                          int packing, int noxattr, struct task_ctx *p);
108         void (*close_fs)(struct btrfs_convert_context *cctx);
109 };
110
111 static void init_convert_context(struct btrfs_convert_context *cctx)
112 {
113         cache_tree_init(&cctx->used);
114         cache_tree_init(&cctx->data_chunks);
115         cache_tree_init(&cctx->free);
116 }
117
118 static void clean_convert_context(struct btrfs_convert_context *cctx)
119 {
120         free_extent_cache_tree(&cctx->used);
121         free_extent_cache_tree(&cctx->data_chunks);
122         free_extent_cache_tree(&cctx->free);
123 }
124
125 static inline int convert_alloc_block(struct btrfs_convert_context *cctx,
126                                       u64 goal, u64 *ret)
127 {
128         return  cctx->convert_ops->alloc_block(cctx, goal, ret);
129 }
130
131 static inline int convert_alloc_block_range(struct btrfs_convert_context *cctx,
132                                       u64 goal, int num, u64 *ret)
133 {
134         return  cctx->convert_ops->alloc_block_range(cctx, goal, num, ret);
135 }
136
137 static inline int convert_test_block(struct btrfs_convert_context *cctx,
138                                      u64 block)
139 {
140         return cctx->convert_ops->test_block(cctx, block);
141 }
142
143 static inline void convert_free_block(struct btrfs_convert_context *cctx,
144                                       u64 block)
145 {
146         cctx->convert_ops->free_block(cctx, block);
147 }
148
149 static inline void convert_free_block_range(struct btrfs_convert_context *cctx,
150                                       u64 block, int num)
151 {
152         cctx->convert_ops->free_block_range(cctx, block, num);
153 }
154
155 static inline int copy_inodes(struct btrfs_convert_context *cctx,
156                               struct btrfs_root *root, int datacsum,
157                               int packing, int noxattr, struct task_ctx *p)
158 {
159         return cctx->convert_ops->copy_inodes(cctx, root, datacsum, packing,
160                                              noxattr, p);
161 }
162
163 static inline void convert_close_fs(struct btrfs_convert_context *cctx)
164 {
165         cctx->convert_ops->close_fs(cctx);
166 }
167
168 /*
169  * Open Ext2fs in readonly mode, read block allocation bitmap and
170  * inode bitmap into memory.
171  */
172 static int ext2_open_fs(struct btrfs_convert_context *cctx, const char *name)
173 {
174         errcode_t ret;
175         ext2_filsys ext2_fs;
176         ext2_ino_t ino;
177         u32 ro_feature;
178
179         ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs);
180         if (ret) {
181                 fprintf(stderr, "ext2fs_open: %s\n", error_message(ret));
182                 return -1;
183         }
184         /*
185          * We need to know exactly the used space, some RO compat flags like
186          * BIGALLOC will affect how used space is present.
187          * So we need manuall check any unsupported RO compat flags
188          */
189         ro_feature = ext2_fs->super->s_feature_ro_compat;
190         if (ro_feature & ~EXT2_LIB_FEATURE_RO_COMPAT_SUPP) {
191                 error(
192 "unsupported RO features detected: %x, abort convert to avoid possible corruption",
193                       ro_feature & ~EXT2_LIB_FEATURE_COMPAT_SUPP);
194                 goto fail;
195         }
196         ret = ext2fs_read_inode_bitmap(ext2_fs);
197         if (ret) {
198                 fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n",
199                         error_message(ret));
200                 goto fail;
201         }
202         ret = ext2fs_read_block_bitmap(ext2_fs);
203         if (ret) {
204                 fprintf(stderr, "ext2fs_read_block_bitmap: %s\n",
205                         error_message(ret));
206                 goto fail;
207         }
208         /*
209          * search each block group for a free inode. this set up
210          * uninit block/inode bitmaps appropriately.
211          */
212         ino = 1;
213         while (ino <= ext2_fs->super->s_inodes_count) {
214                 ext2_ino_t foo;
215                 ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo);
216                 ino += EXT2_INODES_PER_GROUP(ext2_fs->super);
217         }
218
219         if (!(ext2_fs->super->s_feature_incompat &
220               EXT2_FEATURE_INCOMPAT_FILETYPE)) {
221                 fprintf(stderr, "filetype feature is missing\n");
222                 goto fail;
223         }
224
225         cctx->fs_data = ext2_fs;
226         cctx->blocksize = ext2_fs->blocksize;
227         cctx->block_count = ext2_fs->super->s_blocks_count;
228         cctx->total_bytes = ext2_fs->blocksize * ext2_fs->super->s_blocks_count;
229         cctx->volume_name = strndup(ext2_fs->super->s_volume_name, 16);
230         cctx->first_data_block = ext2_fs->super->s_first_data_block;
231         cctx->inodes_count = ext2_fs->super->s_inodes_count;
232         cctx->free_inodes_count = ext2_fs->super->s_free_inodes_count;
233         return 0;
234 fail:
235         ext2fs_close(ext2_fs);
236         return -1;
237 }
238
239 static int __ext2_add_one_block(ext2_filsys fs, char *bitmap,
240                                 unsigned long group_nr, struct cache_tree *used)
241 {
242         unsigned long offset;
243         unsigned i;
244         int ret = 0;
245
246         offset = fs->super->s_first_data_block;
247         offset /= EXT2FS_CLUSTER_RATIO(fs);
248         offset += group_nr * EXT2_CLUSTERS_PER_GROUP(fs->super);
249         for (i = 0; i < EXT2_CLUSTERS_PER_GROUP(fs->super); i++) {
250                 if (ext2fs_test_bit(i, bitmap)) {
251                         u64 start;
252
253                         start = (i + offset) * EXT2FS_CLUSTER_RATIO(fs);
254                         start *= fs->blocksize;
255                         ret = add_merge_cache_extent(used, start,
256                                                      fs->blocksize);
257                         if (ret < 0)
258                                 break;
259                 }
260         }
261         return ret;
262 }
263
264 /*
265  * Read all used ext2 space into cctx->used cache tree
266  */
267 static int ext2_read_used_space(struct btrfs_convert_context *cctx)
268 {
269         ext2_filsys fs = (ext2_filsys)cctx->fs_data;
270         blk64_t blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
271         struct cache_tree *used_tree = &cctx->used;
272         char *block_bitmap = NULL;
273         unsigned long i;
274         int block_nbytes;
275         int ret = 0;
276
277         block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
278         /* Shouldn't happen */
279         BUG_ON(!fs->block_map);
280
281         block_bitmap = malloc(block_nbytes);
282         if (!block_bitmap)
283                 return -ENOMEM;
284
285         for (i = 0; i < fs->group_desc_count; i++) {
286                 ret = ext2fs_get_block_bitmap_range(fs->block_map, blk_itr,
287                                                 block_nbytes * 8, block_bitmap);
288                 if (ret) {
289                         error("fail to get bitmap from ext2, %s",
290                               strerror(-ret));
291                         break;
292                 }
293                 ret = __ext2_add_one_block(fs, block_bitmap, i, used_tree);
294                 if (ret < 0) {
295                         error("fail to build used space tree, %s",
296                               strerror(-ret));
297                         break;
298                 }
299                 blk_itr += EXT2_CLUSTERS_PER_GROUP(fs->super);
300         }
301
302         free(block_bitmap);
303         return ret;
304 }
305
306 static void ext2_close_fs(struct btrfs_convert_context *cctx)
307 {
308         if (cctx->volume_name) {
309                 free(cctx->volume_name);
310                 cctx->volume_name = NULL;
311         }
312         ext2fs_close(cctx->fs_data);
313 }
314
315 static int ext2_alloc_block(struct btrfs_convert_context *cctx,
316                             u64 goal, u64 *block_ret)
317 {
318         ext2_filsys fs = cctx->fs_data;
319         blk_t block;
320
321         if (!ext2fs_new_block(fs, goal, NULL, &block)) {
322                 ext2fs_fast_mark_block_bitmap(fs->block_map, block);
323                 *block_ret = block;
324                 return 0;
325         }
326         return -ENOSPC;
327 }
328
329 static int ext2_alloc_block_range(struct btrfs_convert_context *cctx, u64 goal,
330                 int num, u64 *block_ret)
331 {
332         ext2_filsys fs = cctx->fs_data;
333         blk_t block;
334         ext2fs_block_bitmap bitmap = fs->block_map;
335         blk_t start = ext2fs_get_block_bitmap_start(bitmap);
336         blk_t end = ext2fs_get_block_bitmap_end(bitmap);
337
338         for (block = max_t(u64, goal, start); block + num < end; block++) {
339                 if (ext2fs_fast_test_block_bitmap_range(bitmap, block, num)) {
340                         ext2fs_fast_mark_block_bitmap_range(bitmap, block,
341                                         num);
342                         *block_ret = block;
343                         return 0;
344                 }
345         }
346         return -ENOSPC;
347 }
348
349 static void ext2_free_block(struct btrfs_convert_context *cctx, u64 block)
350 {
351         ext2_filsys fs = cctx->fs_data;
352
353         BUG_ON(block != (blk_t)block);
354         ext2fs_fast_unmark_block_bitmap(fs->block_map, block);
355 }
356
357 static void ext2_free_block_range(struct btrfs_convert_context *cctx, u64 block, int num)
358 {
359         ext2_filsys fs = cctx->fs_data;
360
361         BUG_ON(block != (blk_t)block);
362         ext2fs_fast_unmark_block_bitmap_range(fs->block_map, block, num);
363 }
364
365 static int cache_free_extents(struct btrfs_root *root,
366                               struct btrfs_convert_context *cctx)
367
368 {
369         int i, ret = 0;
370         blk_t block;
371         u64 bytenr;
372         u64 blocksize = cctx->blocksize;
373
374         block = cctx->first_data_block;
375         for (; block < cctx->block_count; block++) {
376                 if (convert_test_block(cctx, block))
377                         continue;
378                 bytenr = block * blocksize;
379                 ret = set_extent_dirty(&root->fs_info->free_space_cache,
380                                        bytenr, bytenr + blocksize - 1, 0);
381                 BUG_ON(ret);
382         }
383
384         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
385                 bytenr = btrfs_sb_offset(i);
386                 bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1);
387                 if (bytenr >= blocksize * cctx->block_count)
388                         break;
389                 clear_extent_dirty(&root->fs_info->free_space_cache, bytenr,
390                                    bytenr + BTRFS_STRIPE_LEN - 1, 0);
391         }
392
393         clear_extent_dirty(&root->fs_info->free_space_cache,
394                            0, BTRFS_SUPER_INFO_OFFSET - 1, 0);
395
396         return 0;
397 }
398
399 static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes,
400                                u64 hint_byte, struct btrfs_key *ins,
401                                int metadata)
402 {
403         u64 start;
404         u64 end;
405         u64 last = hint_byte;
406         int ret;
407         int wrapped = 0;
408         struct btrfs_block_group_cache *cache;
409
410         while(1) {
411                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
412                                             last, &start, &end, EXTENT_DIRTY);
413                 if (ret) {
414                         if (wrapped++ == 0) {
415                                 last = 0;
416                                 continue;
417                         } else {
418                                 goto fail;
419                         }
420                 }
421
422                 start = max(last, start);
423                 last = end + 1;
424                 if (last - start < num_bytes)
425                         continue;
426
427                 last = start + num_bytes;
428                 if (test_range_bit(&root->fs_info->pinned_extents,
429                                    start, last - 1, EXTENT_DIRTY, 0))
430                         continue;
431
432                 cache = btrfs_lookup_block_group(root->fs_info, start);
433                 BUG_ON(!cache);
434                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM ||
435                     last > cache->key.objectid + cache->key.offset) {
436                         last = cache->key.objectid + cache->key.offset;
437                         continue;
438                 }
439
440                 if (metadata) {
441                         BUG_ON(num_bytes != root->nodesize);
442                         if (check_crossing_stripes(start, num_bytes)) {
443                                 last = round_down(start + num_bytes,
444                                                   BTRFS_STRIPE_LEN);
445                                 continue;
446                         }
447                 }
448                 clear_extent_dirty(&root->fs_info->free_space_cache,
449                                    start, start + num_bytes - 1, 0);
450
451                 ins->objectid = start;
452                 ins->offset = num_bytes;
453                 ins->type = BTRFS_EXTENT_ITEM_KEY;
454                 return 0;
455         }
456 fail:
457         fprintf(stderr, "not enough free space\n");
458         return -ENOSPC;
459 }
460
461 static int intersect_with_sb(u64 bytenr, u64 num_bytes)
462 {
463         int i;
464         u64 offset;
465
466         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
467                 offset = btrfs_sb_offset(i);
468                 offset &= ~((u64)BTRFS_STRIPE_LEN - 1);
469
470                 if (bytenr < offset + BTRFS_STRIPE_LEN &&
471                     bytenr + num_bytes > offset)
472                         return 1;
473         }
474         return 0;
475 }
476
477 static int custom_free_extent(struct btrfs_root *root, u64 bytenr,
478                               u64 num_bytes)
479 {
480         return intersect_with_sb(bytenr, num_bytes);
481 }
482
483 static struct btrfs_extent_ops extent_ops = {
484         .alloc_extent = custom_alloc_extent,
485         .free_extent = custom_free_extent,
486 };
487
488 static int convert_insert_dirent(struct btrfs_trans_handle *trans,
489                                  struct btrfs_root *root,
490                                  const char *name, size_t name_len,
491                                  u64 dir, u64 objectid,
492                                  u8 file_type, u64 index_cnt,
493                                  struct btrfs_inode_item *inode)
494 {
495         int ret;
496         u64 inode_size;
497         struct btrfs_key location = {
498                 .objectid = objectid,
499                 .offset = 0,
500                 .type = BTRFS_INODE_ITEM_KEY,
501         };
502
503         ret = btrfs_insert_dir_item(trans, root, name, name_len,
504                                     dir, &location, file_type, index_cnt);
505         if (ret)
506                 return ret;
507         ret = btrfs_insert_inode_ref(trans, root, name, name_len,
508                                      objectid, dir, index_cnt);
509         if (ret)
510                 return ret;
511         inode_size = btrfs_stack_inode_size(inode) + name_len * 2;
512         btrfs_set_stack_inode_size(inode, inode_size);
513
514         return 0;
515 }
516
517 struct dir_iterate_data {
518         struct btrfs_trans_handle *trans;
519         struct btrfs_root *root;
520         struct btrfs_inode_item *inode;
521         u64 objectid;
522         u64 index_cnt;
523         u64 parent;
524         int errcode;
525 };
526
527 static u8 filetype_conversion_table[EXT2_FT_MAX] = {
528         [EXT2_FT_UNKNOWN]       = BTRFS_FT_UNKNOWN,
529         [EXT2_FT_REG_FILE]      = BTRFS_FT_REG_FILE,
530         [EXT2_FT_DIR]           = BTRFS_FT_DIR,
531         [EXT2_FT_CHRDEV]        = BTRFS_FT_CHRDEV,
532         [EXT2_FT_BLKDEV]        = BTRFS_FT_BLKDEV,
533         [EXT2_FT_FIFO]          = BTRFS_FT_FIFO,
534         [EXT2_FT_SOCK]          = BTRFS_FT_SOCK,
535         [EXT2_FT_SYMLINK]       = BTRFS_FT_SYMLINK,
536 };
537
538 static int dir_iterate_proc(ext2_ino_t dir, int entry,
539                             struct ext2_dir_entry *dirent,
540                             int offset, int blocksize,
541                             char *buf,void *priv_data)
542 {
543         int ret;
544         int file_type;
545         u64 objectid;
546         char dotdot[] = "..";
547         struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data;
548         int name_len;
549
550         name_len = dirent->name_len & 0xFF;
551
552         objectid = dirent->inode + INO_OFFSET;
553         if (!strncmp(dirent->name, dotdot, name_len)) {
554                 if (name_len == 2) {
555                         BUG_ON(idata->parent != 0);
556                         idata->parent = objectid;
557                 }
558                 return 0;
559         }
560         if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO)
561                 return 0;
562
563         file_type = dirent->name_len >> 8;
564         BUG_ON(file_type > EXT2_FT_SYMLINK);
565
566         ret = convert_insert_dirent(idata->trans, idata->root, dirent->name,
567                                     name_len, idata->objectid, objectid,
568                                     filetype_conversion_table[file_type],
569                                     idata->index_cnt, idata->inode);
570         if (ret < 0) {
571                 idata->errcode = ret;
572                 return BLOCK_ABORT;
573         }
574
575         idata->index_cnt++;
576         return 0;
577 }
578
579 static int create_dir_entries(struct btrfs_trans_handle *trans,
580                               struct btrfs_root *root, u64 objectid,
581                               struct btrfs_inode_item *btrfs_inode,
582                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
583 {
584         int ret;
585         errcode_t err;
586         struct dir_iterate_data data = {
587                 .trans          = trans,
588                 .root           = root,
589                 .inode          = btrfs_inode,
590                 .objectid       = objectid,
591                 .index_cnt      = 2,
592                 .parent         = 0,
593                 .errcode        = 0,
594         };
595
596         err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL,
597                                   dir_iterate_proc, &data);
598         if (err)
599                 goto error;
600         ret = data.errcode;
601         if (ret == 0 && data.parent == objectid) {
602                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
603                                              objectid, objectid, 0);
604         }
605         return ret;
606 error:
607         fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err));
608         return -1;
609 }
610
611 static int read_disk_extent(struct btrfs_root *root, u64 bytenr,
612                             u32 num_bytes, char *buffer)
613 {
614         int ret;
615         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
616
617         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr);
618         if (ret != num_bytes)
619                 goto fail;
620         ret = 0;
621 fail:
622         if (ret > 0)
623                 ret = -1;
624         return ret;
625 }
626
627 static int csum_disk_extent(struct btrfs_trans_handle *trans,
628                             struct btrfs_root *root,
629                             u64 disk_bytenr, u64 num_bytes)
630 {
631         u32 blocksize = root->sectorsize;
632         u64 offset;
633         char *buffer;
634         int ret = 0;
635
636         buffer = malloc(blocksize);
637         if (!buffer)
638                 return -ENOMEM;
639         for (offset = 0; offset < num_bytes; offset += blocksize) {
640                 ret = read_disk_extent(root, disk_bytenr + offset,
641                                         blocksize, buffer);
642                 if (ret)
643                         break;
644                 ret = btrfs_csum_file_block(trans,
645                                             root->fs_info->csum_root,
646                                             disk_bytenr + num_bytes,
647                                             disk_bytenr + offset,
648                                             buffer, blocksize);
649                 if (ret)
650                         break;
651         }
652         free(buffer);
653         return ret;
654 }
655
656 struct blk_iterate_data {
657         struct btrfs_trans_handle *trans;
658         struct btrfs_root *root;
659         struct btrfs_root *convert_root;
660         struct btrfs_inode_item *inode;
661         u64 convert_ino;
662         u64 objectid;
663         u64 first_block;
664         u64 disk_block;
665         u64 num_blocks;
666         u64 boundary;
667         int checksum;
668         int errcode;
669 };
670
671 static void init_blk_iterate_data(struct blk_iterate_data *data,
672                                   struct btrfs_trans_handle *trans,
673                                   struct btrfs_root *root,
674                                   struct btrfs_inode_item *inode,
675                                   u64 objectid, int checksum)
676 {
677         struct btrfs_key key;
678
679         data->trans             = trans;
680         data->root              = root;
681         data->inode             = inode;
682         data->objectid          = objectid;
683         data->first_block       = 0;
684         data->disk_block        = 0;
685         data->num_blocks        = 0;
686         data->boundary          = (u64)-1;
687         data->checksum          = checksum;
688         data->errcode           = 0;
689
690         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
691         key.type = BTRFS_ROOT_ITEM_KEY;
692         key.offset = (u64)-1;
693         data->convert_root = btrfs_read_fs_root(root->fs_info, &key);
694         /* Impossible as we just opened it before */
695         BUG_ON(!data->convert_root || IS_ERR(data->convert_root));
696         data->convert_ino = BTRFS_FIRST_FREE_OBJECTID + 1;
697 }
698
699 /*
700  * Record a file extent in original filesystem into btrfs one.
701  * The special point is, old disk_block can point to a reserved range.
702  * So here, we don't use disk_block directly but search convert_root
703  * to get the real disk_bytenr.
704  */
705 static int record_file_blocks(struct blk_iterate_data *data,
706                               u64 file_block, u64 disk_block, u64 num_blocks)
707 {
708         int ret = 0;
709         struct btrfs_root *root = data->root;
710         struct btrfs_root *convert_root = data->convert_root;
711         struct btrfs_path *path;
712         u64 file_pos = file_block * root->sectorsize;
713         u64 old_disk_bytenr = disk_block * root->sectorsize;
714         u64 num_bytes = num_blocks * root->sectorsize;
715         u64 cur_off = old_disk_bytenr;
716
717         /* Hole, pass it to record_file_extent directly */
718         if (old_disk_bytenr == 0)
719                 return btrfs_record_file_extent(data->trans, root,
720                                 data->objectid, data->inode, file_pos, 0,
721                                 num_bytes);
722
723         path = btrfs_alloc_path();
724         if (!path)
725                 return -ENOMEM;
726
727         /*
728          * Search real disk bytenr from convert root
729          */
730         while (cur_off < old_disk_bytenr + num_bytes) {
731                 struct btrfs_key key;
732                 struct btrfs_file_extent_item *fi;
733                 struct extent_buffer *node;
734                 int slot;
735                 u64 extent_disk_bytenr;
736                 u64 extent_num_bytes;
737                 u64 real_disk_bytenr;
738                 u64 cur_len;
739
740                 key.objectid = data->convert_ino;
741                 key.type = BTRFS_EXTENT_DATA_KEY;
742                 key.offset = cur_off;
743
744                 ret = btrfs_search_slot(NULL, convert_root, &key, path, 0, 0);
745                 if (ret < 0)
746                         break;
747                 if (ret > 0) {
748                         ret = btrfs_previous_item(convert_root, path,
749                                                   data->convert_ino,
750                                                   BTRFS_EXTENT_DATA_KEY);
751                         if (ret < 0)
752                                 break;
753                         if (ret > 0) {
754                                 ret = -ENOENT;
755                                 break;
756                         }
757                 }
758                 node = path->nodes[0];
759                 slot = path->slots[0];
760                 btrfs_item_key_to_cpu(node, &key, slot);
761                 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY ||
762                        key.objectid != data->convert_ino ||
763                        key.offset > cur_off);
764                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
765                 extent_disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
766                 extent_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
767                 BUG_ON(cur_off - key.offset >= extent_num_bytes);
768                 btrfs_release_path(path);
769
770                 real_disk_bytenr = cur_off - key.offset + extent_disk_bytenr;
771                 cur_len = min(key.offset + extent_num_bytes,
772                               old_disk_bytenr + num_bytes) - cur_off;
773                 ret = btrfs_record_file_extent(data->trans, data->root,
774                                         data->objectid, data->inode, file_pos,
775                                         real_disk_bytenr, cur_len);
776                 if (ret < 0)
777                         break;
778                 cur_off += cur_len;
779                 file_pos += cur_len;
780
781                 /*
782                  * No need to care about csum
783                  * As every byte of old fs image is calculated for csum, no
784                  * need to waste CPU cycles now.
785                  */
786         }
787         btrfs_free_path(path);
788         return ret;
789 }
790
791 static int block_iterate_proc(u64 disk_block, u64 file_block,
792                               struct blk_iterate_data *idata)
793 {
794         int ret = 0;
795         int sb_region;
796         int do_barrier;
797         struct btrfs_root *root = idata->root;
798         struct btrfs_block_group_cache *cache;
799         u64 bytenr = disk_block * root->sectorsize;
800
801         sb_region = intersect_with_sb(bytenr, root->sectorsize);
802         do_barrier = sb_region || disk_block >= idata->boundary;
803         if ((idata->num_blocks > 0 && do_barrier) ||
804             (file_block > idata->first_block + idata->num_blocks) ||
805             (disk_block != idata->disk_block + idata->num_blocks)) {
806                 if (idata->num_blocks > 0) {
807                         ret = record_file_blocks(idata, idata->first_block,
808                                                  idata->disk_block,
809                                                  idata->num_blocks);
810                         if (ret)
811                                 goto fail;
812                         idata->first_block += idata->num_blocks;
813                         idata->num_blocks = 0;
814                 }
815                 if (file_block > idata->first_block) {
816                         ret = record_file_blocks(idata, idata->first_block,
817                                         0, file_block - idata->first_block);
818                         if (ret)
819                                 goto fail;
820                 }
821
822                 if (sb_region) {
823                         bytenr += BTRFS_STRIPE_LEN - 1;
824                         bytenr &= ~((u64)BTRFS_STRIPE_LEN - 1);
825                 } else {
826                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
827                         BUG_ON(!cache);
828                         bytenr = cache->key.objectid + cache->key.offset;
829                 }
830
831                 idata->first_block = file_block;
832                 idata->disk_block = disk_block;
833                 idata->boundary = bytenr / root->sectorsize;
834         }
835         idata->num_blocks++;
836 fail:
837         return ret;
838 }
839
840 static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr,
841                                 e2_blkcnt_t blockcnt, blk_t ref_block,
842                                 int ref_offset, void *priv_data)
843 {
844         int ret;
845         struct blk_iterate_data *idata;
846         idata = (struct blk_iterate_data *)priv_data;
847         ret = block_iterate_proc(*blocknr, blockcnt, idata);
848         if (ret) {
849                 idata->errcode = ret;
850                 return BLOCK_ABORT;
851         }
852         return 0;
853 }
854
855 /*
856  * traverse file's data blocks, record these data blocks as file extents.
857  */
858 static int create_file_extents(struct btrfs_trans_handle *trans,
859                                struct btrfs_root *root, u64 objectid,
860                                struct btrfs_inode_item *btrfs_inode,
861                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
862                                int datacsum, int packing)
863 {
864         int ret;
865         char *buffer = NULL;
866         errcode_t err;
867         u32 last_block;
868         u32 sectorsize = root->sectorsize;
869         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
870         struct blk_iterate_data data;
871
872         init_blk_iterate_data(&data, trans, root, btrfs_inode, objectid,
873                               datacsum);
874
875         err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY,
876                                     NULL, __block_iterate_proc, &data);
877         if (err)
878                 goto error;
879         ret = data.errcode;
880         if (ret)
881                 goto fail;
882         if (packing && data.first_block == 0 && data.num_blocks > 0 &&
883             inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
884                 u64 num_bytes = data.num_blocks * sectorsize;
885                 u64 disk_bytenr = data.disk_block * sectorsize;
886                 u64 nbytes;
887
888                 buffer = malloc(num_bytes);
889                 if (!buffer)
890                         return -ENOMEM;
891                 ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer);
892                 if (ret)
893                         goto fail;
894                 if (num_bytes > inode_size)
895                         num_bytes = inode_size;
896                 ret = btrfs_insert_inline_extent(trans, root, objectid,
897                                                  0, buffer, num_bytes);
898                 if (ret)
899                         goto fail;
900                 nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes;
901                 btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes);
902         } else if (data.num_blocks > 0) {
903                 ret = record_file_blocks(&data, data.first_block,
904                                          data.disk_block, data.num_blocks);
905                 if (ret)
906                         goto fail;
907         }
908         data.first_block += data.num_blocks;
909         last_block = (inode_size + sectorsize - 1) / sectorsize;
910         if (last_block > data.first_block) {
911                 ret = record_file_blocks(&data, data.first_block, 0,
912                                          last_block - data.first_block);
913         }
914 fail:
915         free(buffer);
916         return ret;
917 error:
918         fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err));
919         return -1;
920 }
921
922 static int create_symbol_link(struct btrfs_trans_handle *trans,
923                               struct btrfs_root *root, u64 objectid,
924                               struct btrfs_inode_item *btrfs_inode,
925                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
926                               struct ext2_inode *ext2_inode)
927 {
928         int ret;
929         char *pathname;
930         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
931         if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) {
932                 btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1);
933                 ret = create_file_extents(trans, root, objectid, btrfs_inode,
934                                           ext2_fs, ext2_ino, 1, 1);
935                 btrfs_set_stack_inode_size(btrfs_inode, inode_size);
936                 return ret;
937         }
938
939         pathname = (char *)&(ext2_inode->i_block[0]);
940         BUG_ON(pathname[inode_size] != 0);
941         ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
942                                          pathname, inode_size + 1);
943         btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1);
944         return ret;
945 }
946
947 /*
948  * Following xattr/acl related codes are based on codes in
949  * fs/ext3/xattr.c and fs/ext3/acl.c
950  */
951 #define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr))
952 #define EXT2_XATTR_BFIRST(ptr) \
953         ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1))
954 #define EXT2_XATTR_IHDR(inode) \
955         ((struct ext2_ext_attr_header *) ((void *)(inode) + \
956                 EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize))
957 #define EXT2_XATTR_IFIRST(inode) \
958         ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \
959                 sizeof(EXT2_XATTR_IHDR(inode)->h_magic)))
960
961 static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry,
962                                   const void *end)
963 {
964         struct ext2_ext_attr_entry *next;
965
966         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
967                 next = EXT2_EXT_ATTR_NEXT(entry);
968                 if ((void *)next >= end)
969                         return -EIO;
970                 entry = next;
971         }
972         return 0;
973 }
974
975 static int ext2_xattr_check_block(const char *buf, size_t size)
976 {
977         int error;
978         struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf);
979
980         if (header->h_magic != EXT2_EXT_ATTR_MAGIC ||
981             header->h_blocks != 1)
982                 return -EIO;
983         error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size);
984         return error;
985 }
986
987 static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry,
988                                   size_t size)
989 {
990         size_t value_size = entry->e_value_size;
991
992         if (entry->e_value_block != 0 || value_size > size ||
993             entry->e_value_offs + value_size > size)
994                 return -EIO;
995         return 0;
996 }
997
998 #define EXT2_ACL_VERSION        0x0001
999
1000 /* 23.2.5 acl_tag_t values */
1001
1002 #define ACL_UNDEFINED_TAG       (0x00)
1003 #define ACL_USER_OBJ            (0x01)
1004 #define ACL_USER                (0x02)
1005 #define ACL_GROUP_OBJ           (0x04)
1006 #define ACL_GROUP               (0x08)
1007 #define ACL_MASK                (0x10)
1008 #define ACL_OTHER               (0x20)
1009
1010 /* 23.2.7 ACL qualifier constants */
1011
1012 #define ACL_UNDEFINED_ID        ((id_t)-1)
1013
1014 typedef struct {
1015         __le16          e_tag;
1016         __le16          e_perm;
1017         __le32          e_id;
1018 } ext2_acl_entry;
1019
1020 typedef struct {
1021         __le16          e_tag;
1022         __le16          e_perm;
1023 } ext2_acl_entry_short;
1024
1025 typedef struct {
1026         __le32          a_version;
1027 } ext2_acl_header;
1028
1029 static inline int ext2_acl_count(size_t size)
1030 {
1031         ssize_t s;
1032         size -= sizeof(ext2_acl_header);
1033         s = size - 4 * sizeof(ext2_acl_entry_short);
1034         if (s < 0) {
1035                 if (size % sizeof(ext2_acl_entry_short))
1036                         return -1;
1037                 return size / sizeof(ext2_acl_entry_short);
1038         } else {
1039                 if (s % sizeof(ext2_acl_entry))
1040                         return -1;
1041                 return s / sizeof(ext2_acl_entry) + 4;
1042         }
1043 }
1044
1045 #define ACL_EA_VERSION          0x0002
1046
1047 typedef struct {
1048         __le16          e_tag;
1049         __le16          e_perm;
1050         __le32          e_id;
1051 } acl_ea_entry;
1052
1053 typedef struct {
1054         __le32          a_version;
1055         acl_ea_entry    a_entries[0];
1056 } acl_ea_header;
1057
1058 static inline size_t acl_ea_size(int count)
1059 {
1060         return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry);
1061 }
1062
1063 static int ext2_acl_to_xattr(void *dst, const void *src,
1064                              size_t dst_size, size_t src_size)
1065 {
1066         int i, count;
1067         const void *end = src + src_size;
1068         acl_ea_header *ext_acl = (acl_ea_header *)dst;
1069         acl_ea_entry *dst_entry = ext_acl->a_entries;
1070         ext2_acl_entry *src_entry;
1071
1072         if (src_size < sizeof(ext2_acl_header))
1073                 goto fail;
1074         if (((ext2_acl_header *)src)->a_version !=
1075             cpu_to_le32(EXT2_ACL_VERSION))
1076                 goto fail;
1077         src += sizeof(ext2_acl_header);
1078         count = ext2_acl_count(src_size);
1079         if (count <= 0)
1080                 goto fail;
1081
1082         BUG_ON(dst_size < acl_ea_size(count));
1083         ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION);
1084         for (i = 0; i < count; i++, dst_entry++) {
1085                 src_entry = (ext2_acl_entry *)src;
1086                 if (src + sizeof(ext2_acl_entry_short) > end)
1087                         goto fail;
1088                 dst_entry->e_tag = src_entry->e_tag;
1089                 dst_entry->e_perm = src_entry->e_perm;
1090                 switch (le16_to_cpu(src_entry->e_tag)) {
1091                 case ACL_USER_OBJ:
1092                 case ACL_GROUP_OBJ:
1093                 case ACL_MASK:
1094                 case ACL_OTHER:
1095                         src += sizeof(ext2_acl_entry_short);
1096                         dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
1097                         break;
1098                 case ACL_USER:
1099                 case ACL_GROUP:
1100                         src += sizeof(ext2_acl_entry);
1101                         if (src > end)
1102                                 goto fail;
1103                         dst_entry->e_id = src_entry->e_id;
1104                         break;
1105                 default:
1106                         goto fail;
1107                 }
1108         }
1109         if (src != end)
1110                 goto fail;
1111         return 0;
1112 fail:
1113         return -EINVAL;
1114 }
1115
1116 static char *xattr_prefix_table[] = {
1117         [1] =   "user.",
1118         [2] =   "system.posix_acl_access",
1119         [3] =   "system.posix_acl_default",
1120         [4] =   "trusted.",
1121         [6] =   "security.",
1122 };
1123
1124 static int copy_single_xattr(struct btrfs_trans_handle *trans,
1125                              struct btrfs_root *root, u64 objectid,
1126                              struct ext2_ext_attr_entry *entry,
1127                              const void *data, u32 datalen)
1128 {
1129         int ret = 0;
1130         int name_len;
1131         int name_index;
1132         void *databuf = NULL;
1133         char namebuf[XATTR_NAME_MAX + 1];
1134
1135         name_index = entry->e_name_index;
1136         if (name_index >= ARRAY_SIZE(xattr_prefix_table) ||
1137             xattr_prefix_table[name_index] == NULL)
1138                 return -EOPNOTSUPP;
1139         name_len = strlen(xattr_prefix_table[name_index]) +
1140                    entry->e_name_len;
1141         if (name_len >= sizeof(namebuf))
1142                 return -ERANGE;
1143
1144         if (name_index == 2 || name_index == 3) {
1145                 size_t bufsize = acl_ea_size(ext2_acl_count(datalen));
1146                 databuf = malloc(bufsize);
1147                 if (!databuf)
1148                        return -ENOMEM;
1149                 ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen);
1150                 if (ret)
1151                         goto out;
1152                 data = databuf;
1153                 datalen = bufsize;
1154         }
1155         strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX);
1156         strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len);
1157         if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) -
1158             sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) {
1159                 fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n",
1160                         objectid - INO_OFFSET, name_len, namebuf);
1161                 goto out;
1162         }
1163         ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len,
1164                                       data, datalen, objectid);
1165 out:
1166         free(databuf);
1167         return ret;
1168 }
1169
1170 static int copy_extended_attrs(struct btrfs_trans_handle *trans,
1171                                struct btrfs_root *root, u64 objectid,
1172                                struct btrfs_inode_item *btrfs_inode,
1173                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
1174 {
1175         int ret = 0;
1176         int inline_ea = 0;
1177         errcode_t err;
1178         u32 datalen;
1179         u32 block_size = ext2_fs->blocksize;
1180         u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super);
1181         struct ext2_inode_large *ext2_inode;
1182         struct ext2_ext_attr_entry *entry;
1183         void *data;
1184         char *buffer = NULL;
1185         char inode_buf[EXT2_GOOD_OLD_INODE_SIZE];
1186
1187         if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) {
1188                 ext2_inode = (struct ext2_inode_large *)inode_buf;
1189         } else {
1190                 ext2_inode = (struct ext2_inode_large *)malloc(inode_size);
1191                 if (!ext2_inode)
1192                        return -ENOMEM;
1193         }
1194         err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode,
1195                                      inode_size);
1196         if (err) {
1197                 fprintf(stderr, "ext2fs_read_inode_full: %s\n",
1198                         error_message(err));
1199                 ret = -1;
1200                 goto out;
1201         }
1202
1203         if (ext2_ino > ext2_fs->super->s_first_ino &&
1204             inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
1205                 if (EXT2_GOOD_OLD_INODE_SIZE +
1206                     ext2_inode->i_extra_isize > inode_size) {
1207                         ret = -EIO;
1208                         goto out;
1209                 }
1210                 if (ext2_inode->i_extra_isize != 0 &&
1211                     EXT2_XATTR_IHDR(ext2_inode)->h_magic ==
1212                     EXT2_EXT_ATTR_MAGIC) {
1213                         inline_ea = 1;
1214                 }
1215         }
1216         if (inline_ea) {
1217                 int total;
1218                 void *end = (void *)ext2_inode + inode_size;
1219                 entry = EXT2_XATTR_IFIRST(ext2_inode);
1220                 total = end - (void *)entry;
1221                 ret = ext2_xattr_check_names(entry, end);
1222                 if (ret)
1223                         goto out;
1224                 while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
1225                         ret = ext2_xattr_check_entry(entry, total);
1226                         if (ret)
1227                                 goto out;
1228                         data = (void *)EXT2_XATTR_IFIRST(ext2_inode) +
1229                                 entry->e_value_offs;
1230                         datalen = entry->e_value_size;
1231                         ret = copy_single_xattr(trans, root, objectid,
1232                                                 entry, data, datalen);
1233                         if (ret)
1234                                 goto out;
1235                         entry = EXT2_EXT_ATTR_NEXT(entry);
1236                 }
1237         }
1238
1239         if (ext2_inode->i_file_acl == 0)
1240                 goto out;
1241
1242         buffer = malloc(block_size);
1243         if (!buffer) {
1244                 ret = -ENOMEM;
1245                 goto out;
1246         }
1247         err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer);
1248         if (err) {
1249                 fprintf(stderr, "ext2fs_read_ext_attr: %s\n",
1250                         error_message(err));
1251                 ret = -1;
1252                 goto out;
1253         }
1254         ret = ext2_xattr_check_block(buffer, block_size);
1255         if (ret)
1256                 goto out;
1257
1258         entry = EXT2_XATTR_BFIRST(buffer);
1259         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
1260                 ret = ext2_xattr_check_entry(entry, block_size);
1261                 if (ret)
1262                         goto out;
1263                 data = buffer + entry->e_value_offs;
1264                 datalen = entry->e_value_size;
1265                 ret = copy_single_xattr(trans, root, objectid,
1266                                         entry, data, datalen);
1267                 if (ret)
1268                         goto out;
1269                 entry = EXT2_EXT_ATTR_NEXT(entry);
1270         }
1271 out:
1272         free(buffer);
1273         if ((void *)ext2_inode != inode_buf)
1274                 free(ext2_inode);
1275         return ret;
1276 }
1277 #define MINORBITS       20
1278 #define MKDEV(ma, mi)   (((ma) << MINORBITS) | (mi))
1279
1280 static inline dev_t old_decode_dev(u16 val)
1281 {
1282         return MKDEV((val >> 8) & 255, val & 255);
1283 }
1284
1285 static inline dev_t new_decode_dev(u32 dev)
1286 {
1287         unsigned major = (dev & 0xfff00) >> 8;
1288         unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
1289         return MKDEV(major, minor);
1290 }
1291
1292 static int copy_inode_item(struct btrfs_inode_item *dst,
1293                            struct ext2_inode *src, u32 blocksize)
1294 {
1295         btrfs_set_stack_inode_generation(dst, 1);
1296         btrfs_set_stack_inode_sequence(dst, 0);
1297         btrfs_set_stack_inode_transid(dst, 1);
1298         btrfs_set_stack_inode_size(dst, src->i_size);
1299         btrfs_set_stack_inode_nbytes(dst, 0);
1300         btrfs_set_stack_inode_block_group(dst, 0);
1301         btrfs_set_stack_inode_nlink(dst, src->i_links_count);
1302         btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16));
1303         btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16));
1304         btrfs_set_stack_inode_mode(dst, src->i_mode);
1305         btrfs_set_stack_inode_rdev(dst, 0);
1306         btrfs_set_stack_inode_flags(dst, 0);
1307         btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime);
1308         btrfs_set_stack_timespec_nsec(&dst->atime, 0);
1309         btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime);
1310         btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
1311         btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime);
1312         btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
1313         btrfs_set_stack_timespec_sec(&dst->otime, 0);
1314         btrfs_set_stack_timespec_nsec(&dst->otime, 0);
1315
1316         if (S_ISDIR(src->i_mode)) {
1317                 btrfs_set_stack_inode_size(dst, 0);
1318                 btrfs_set_stack_inode_nlink(dst, 1);
1319         }
1320         if (S_ISREG(src->i_mode)) {
1321                 btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 |
1322                                            (u64)src->i_size);
1323         }
1324         if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) &&
1325             !S_ISLNK(src->i_mode)) {
1326                 if (src->i_block[0]) {
1327                         btrfs_set_stack_inode_rdev(dst,
1328                                 old_decode_dev(src->i_block[0]));
1329                 } else {
1330                         btrfs_set_stack_inode_rdev(dst,
1331                                 new_decode_dev(src->i_block[1]));
1332                 }
1333         }
1334         memset(&dst->reserved, 0, sizeof(dst->reserved));
1335
1336         return 0;
1337 }
1338
1339 /*
1340  * copy a single inode. do all the required works, such as cloning
1341  * inode item, creating file extents and creating directory entries.
1342  */
1343 static int copy_single_inode(struct btrfs_trans_handle *trans,
1344                              struct btrfs_root *root, u64 objectid,
1345                              ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
1346                              struct ext2_inode *ext2_inode,
1347                              int datacsum, int packing, int noxattr)
1348 {
1349         int ret;
1350         struct btrfs_inode_item btrfs_inode;
1351
1352         if (ext2_inode->i_links_count == 0)
1353                 return 0;
1354
1355         copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize);
1356         if (!datacsum && S_ISREG(ext2_inode->i_mode)) {
1357                 u32 flags = btrfs_stack_inode_flags(&btrfs_inode) |
1358                             BTRFS_INODE_NODATASUM;
1359                 btrfs_set_stack_inode_flags(&btrfs_inode, flags);
1360         }
1361
1362         switch (ext2_inode->i_mode & S_IFMT) {
1363         case S_IFREG:
1364                 ret = create_file_extents(trans, root, objectid, &btrfs_inode,
1365                                         ext2_fs, ext2_ino, datacsum, packing);
1366                 break;
1367         case S_IFDIR:
1368                 ret = create_dir_entries(trans, root, objectid, &btrfs_inode,
1369                                          ext2_fs, ext2_ino);
1370                 break;
1371         case S_IFLNK:
1372                 ret = create_symbol_link(trans, root, objectid, &btrfs_inode,
1373                                          ext2_fs, ext2_ino, ext2_inode);
1374                 break;
1375         default:
1376                 ret = 0;
1377                 break;
1378         }
1379         if (ret)
1380                 return ret;
1381
1382         if (!noxattr) {
1383                 ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode,
1384                                           ext2_fs, ext2_ino);
1385                 if (ret)
1386                         return ret;
1387         }
1388         return btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1389 }
1390
1391 static int copy_disk_extent(struct btrfs_root *root, u64 dst_bytenr,
1392                             u64 src_bytenr, u32 num_bytes)
1393 {
1394         int ret;
1395         char *buffer;
1396         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
1397
1398         buffer = malloc(num_bytes);
1399         if (!buffer)
1400                 return -ENOMEM;
1401         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, src_bytenr);
1402         if (ret != num_bytes)
1403                 goto fail;
1404         ret = pwrite(fs_devs->latest_bdev, buffer, num_bytes, dst_bytenr);
1405         if (ret != num_bytes)
1406                 goto fail;
1407         ret = 0;
1408 fail:
1409         free(buffer);
1410         if (ret > 0)
1411                 ret = -1;
1412         return ret;
1413 }
1414 /*
1415  * scan ext2's inode bitmap and copy all used inodes.
1416  */
1417 static int ext2_copy_inodes(struct btrfs_convert_context *cctx,
1418                             struct btrfs_root *root,
1419                             int datacsum, int packing, int noxattr, struct task_ctx *p)
1420 {
1421         ext2_filsys ext2_fs = cctx->fs_data;
1422         int ret;
1423         errcode_t err;
1424         ext2_inode_scan ext2_scan;
1425         struct ext2_inode ext2_inode;
1426         ext2_ino_t ext2_ino;
1427         u64 objectid;
1428         struct btrfs_trans_handle *trans;
1429
1430         trans = btrfs_start_transaction(root, 1);
1431         if (!trans)
1432                 return -ENOMEM;
1433         err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan);
1434         if (err) {
1435                 fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err));
1436                 return -1;
1437         }
1438         while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino,
1439                                              &ext2_inode))) {
1440                 /* no more inodes */
1441                 if (ext2_ino == 0)
1442                         break;
1443                 /* skip special inode in ext2fs */
1444                 if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO &&
1445                     ext2_ino != EXT2_ROOT_INO)
1446                         continue;
1447                 objectid = ext2_ino + INO_OFFSET;
1448                 ret = copy_single_inode(trans, root,
1449                                         objectid, ext2_fs, ext2_ino,
1450                                         &ext2_inode, datacsum, packing,
1451                                         noxattr);
1452                 p->cur_copy_inodes++;
1453                 if (ret)
1454                         return ret;
1455                 if (trans->blocks_used >= 4096) {
1456                         ret = btrfs_commit_transaction(trans, root);
1457                         BUG_ON(ret);
1458                         trans = btrfs_start_transaction(root, 1);
1459                         BUG_ON(!trans);
1460                 }
1461         }
1462         if (err) {
1463                 fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err));
1464                 return -1;
1465         }
1466         ret = btrfs_commit_transaction(trans, root);
1467         BUG_ON(ret);
1468         ext2fs_close_inode_scan(ext2_scan);
1469
1470         return ret;
1471 }
1472
1473 static int ext2_test_block(struct btrfs_convert_context *cctx, u64 block)
1474 {
1475         ext2_filsys ext2_fs = cctx->fs_data;
1476
1477         BUG_ON(block != (u32)block);
1478         return ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block);
1479 }
1480
1481 /*
1482  * Construct a range of ext2fs image file.
1483  * scan block allocation bitmap, find all blocks used by the ext2fs
1484  * in this range and create file extents that point to these blocks.
1485  *
1486  * Note: Before calling the function, no file extent points to blocks
1487  *       in this range
1488  */
1489 static int create_image_file_range(struct btrfs_trans_handle *trans,
1490                                    struct btrfs_root *root, u64 objectid,
1491                                    struct btrfs_inode_item *inode,
1492                                    u64 start_byte, u64 end_byte,
1493                                    struct btrfs_convert_context *cctx, int datacsum)
1494 {
1495         u32 blocksize = cctx->blocksize;
1496         u32 block = start_byte / blocksize;
1497         u32 last_block = (end_byte + blocksize - 1) / blocksize;
1498         int ret = 0;
1499         struct blk_iterate_data data;
1500
1501         init_blk_iterate_data(&data, trans, root, inode, objectid, datacsum);
1502         data.first_block = block;
1503
1504         for (; start_byte < end_byte; block++, start_byte += blocksize) {
1505                 if (!convert_test_block(cctx, block))
1506                         continue;
1507                 ret = block_iterate_proc(block, block, &data);
1508                 if (ret < 0)
1509                         goto fail;
1510         }
1511         if (data.num_blocks > 0) {
1512                 ret = record_file_blocks(&data, data.first_block,
1513                                          data.disk_block, data.num_blocks);
1514                 if (ret)
1515                         goto fail;
1516                 data.first_block += data.num_blocks;
1517         }
1518         if (last_block > data.first_block) {
1519                 ret = record_file_blocks(&data, data.first_block, 0,
1520                                          last_block - data.first_block);
1521                 if (ret)
1522                         goto fail;
1523         }
1524 fail:
1525         return ret;
1526 }
1527
1528 /*
1529  * Create the fs image file.
1530  */
1531 static int create_image(struct btrfs_convert_context *cctx,
1532                         struct btrfs_root *root, const char *name, int datacsum)
1533 {
1534         int ret;
1535         struct btrfs_key key;
1536         struct btrfs_key location;
1537         struct btrfs_path path;
1538         struct btrfs_inode_item btrfs_inode;
1539         struct btrfs_inode_item *inode_item;
1540         struct extent_buffer *leaf;
1541         struct btrfs_fs_info *fs_info = root->fs_info;
1542         struct btrfs_root *extent_root = fs_info->extent_root;
1543         struct btrfs_trans_handle *trans;
1544         struct btrfs_extent_item *ei;
1545         struct btrfs_extent_inline_ref *iref;
1546         struct btrfs_extent_data_ref *dref;
1547         u64 bytenr;
1548         u64 num_bytes;
1549         u64 objectid;
1550         u64 last_byte;
1551         u64 first_free;
1552         u64 total_bytes;
1553         u64 flags = BTRFS_INODE_READONLY;
1554         u32 sectorsize = root->sectorsize;
1555
1556         total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
1557         first_free =  BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1;
1558         first_free &= ~((u64)sectorsize - 1);
1559         if (!datacsum)
1560                 flags |= BTRFS_INODE_NODATASUM;
1561
1562         memset(&btrfs_inode, 0, sizeof(btrfs_inode));
1563         btrfs_set_stack_inode_generation(&btrfs_inode, 1);
1564         btrfs_set_stack_inode_size(&btrfs_inode, total_bytes);
1565         btrfs_set_stack_inode_nlink(&btrfs_inode, 1);
1566         btrfs_set_stack_inode_nbytes(&btrfs_inode, 0);
1567         btrfs_set_stack_inode_mode(&btrfs_inode, S_IFREG | 0400);
1568         btrfs_set_stack_inode_flags(&btrfs_inode,  flags);
1569         btrfs_init_path(&path);
1570         trans = btrfs_start_transaction(root, 1);
1571         BUG_ON(!trans);
1572
1573         objectid = btrfs_root_dirid(&root->root_item);
1574         ret = btrfs_find_free_objectid(trans, root, objectid, &objectid);
1575         if (ret)
1576                 goto fail;
1577
1578         /*
1579          * copy blocks covered by extent #0 to new positions. extent #0 is
1580          * special, we can't rely on relocate_extents_range to relocate it.
1581          */
1582         for (last_byte = 0; last_byte < first_free; last_byte += sectorsize) {
1583                 ret = custom_alloc_extent(root, sectorsize, 0, &key, 0);
1584                 if (ret)
1585                         goto fail;
1586                 ret = copy_disk_extent(root, key.objectid, last_byte,
1587                                        sectorsize);
1588                 if (ret)
1589                         goto fail;
1590                 ret = btrfs_record_file_extent(trans, root, objectid,
1591                                                &btrfs_inode, last_byte,
1592                                                key.objectid, sectorsize);
1593                 if (ret)
1594                         goto fail;
1595                 if (datacsum) {
1596                         ret = csum_disk_extent(trans, root, key.objectid,
1597                                                sectorsize);
1598                         if (ret)
1599                                 goto fail;
1600                 }
1601         }
1602
1603         while(1) {
1604                 key.objectid = last_byte;
1605                 key.offset = 0;
1606                 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1607                 ret = btrfs_search_slot(trans, fs_info->extent_root,
1608                                         &key, &path, 0, 0);
1609                 if (ret < 0)
1610                         goto fail;
1611 next:
1612                 leaf = path.nodes[0];
1613                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1614                         ret = btrfs_next_leaf(extent_root, &path);
1615                         if (ret < 0)
1616                                 goto fail;
1617                         if (ret > 0)
1618                                 break;
1619                         leaf = path.nodes[0];
1620                 }
1621                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1622                 if (last_byte > key.objectid ||
1623                     key.type != BTRFS_EXTENT_ITEM_KEY) {
1624                         path.slots[0]++;
1625                         goto next;
1626                 }
1627
1628                 bytenr = key.objectid;
1629                 num_bytes = key.offset;
1630                 ei = btrfs_item_ptr(leaf, path.slots[0],
1631                                     struct btrfs_extent_item);
1632                 if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) {
1633                         path.slots[0]++;
1634                         goto next;
1635                 }
1636
1637                 BUG_ON(btrfs_item_size_nr(leaf, path.slots[0]) != sizeof(*ei) +
1638                        btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY));
1639
1640                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
1641                 key.type = btrfs_extent_inline_ref_type(leaf, iref);
1642                 BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
1643                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1644                 if (btrfs_extent_data_ref_root(leaf, dref) !=
1645                     BTRFS_FS_TREE_OBJECTID) {
1646                         path.slots[0]++;
1647                         goto next;
1648                 }
1649
1650                 if (bytenr > last_byte) {
1651                         ret = create_image_file_range(trans, root, objectid,
1652                                                       &btrfs_inode, last_byte,
1653                                                       bytenr, cctx,
1654                                                       datacsum);
1655                         if (ret)
1656                                 goto fail;
1657                 }
1658                 ret = btrfs_record_file_extent(trans, root, objectid,
1659                                                &btrfs_inode, bytenr, bytenr,
1660                                                num_bytes);
1661                 if (ret)
1662                         goto fail;
1663                 last_byte = bytenr + num_bytes;
1664                 btrfs_release_path(&path);
1665
1666                 if (trans->blocks_used >= 4096) {
1667                         ret = btrfs_commit_transaction(trans, root);
1668                         BUG_ON(ret);
1669                         trans = btrfs_start_transaction(root, 1);
1670                         BUG_ON(!trans);
1671                 }
1672         }
1673         btrfs_release_path(&path);
1674         if (total_bytes > last_byte) {
1675                 ret = create_image_file_range(trans, root, objectid,
1676                                               &btrfs_inode, last_byte,
1677                                               total_bytes, cctx,
1678                                               datacsum);
1679                 if (ret)
1680                         goto fail;
1681         }
1682
1683         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1684         if (ret)
1685                 goto fail;
1686
1687         location.objectid = objectid;
1688         location.offset = 0;
1689         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1690         ret = btrfs_insert_dir_item(trans, root, name, strlen(name),
1691                                     btrfs_root_dirid(&root->root_item),
1692                                     &location, BTRFS_FT_REG_FILE, objectid);
1693         if (ret)
1694                 goto fail;
1695         ret = btrfs_insert_inode_ref(trans, root, name, strlen(name),
1696                                      objectid,
1697                                      btrfs_root_dirid(&root->root_item),
1698                                      objectid);
1699         if (ret)
1700                 goto fail;
1701         location.objectid = btrfs_root_dirid(&root->root_item);
1702         location.offset = 0;
1703         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1704         ret = btrfs_lookup_inode(trans, root, &path, &location, 1);
1705         if (ret)
1706                 goto fail;
1707         leaf = path.nodes[0];
1708         inode_item = btrfs_item_ptr(leaf, path.slots[0],
1709                                     struct btrfs_inode_item);
1710         btrfs_set_inode_size(leaf, inode_item, strlen(name) * 2 +
1711                              btrfs_inode_size(leaf, inode_item));
1712         btrfs_mark_buffer_dirty(leaf);
1713         btrfs_release_path(&path);
1714         ret = btrfs_commit_transaction(trans, root);
1715         BUG_ON(ret);
1716 fail:
1717         btrfs_release_path(&path);
1718         return ret;
1719 }
1720
1721 static int create_image_file_range_v2(struct btrfs_trans_handle *trans,
1722                                       struct btrfs_root *root,
1723                                       struct cache_tree *used,
1724                                       struct btrfs_inode_item *inode,
1725                                       u64 ino, u64 bytenr, u64 *ret_len,
1726                                       int datacsum)
1727 {
1728         struct cache_extent *cache;
1729         struct btrfs_block_group_cache *bg_cache;
1730         u64 len = *ret_len;
1731         u64 disk_bytenr;
1732         int ret;
1733
1734         BUG_ON(bytenr != round_down(bytenr, root->sectorsize));
1735         BUG_ON(len != round_down(len, root->sectorsize));
1736         len = min_t(u64, len, BTRFS_MAX_EXTENT_SIZE);
1737
1738         cache = search_cache_extent(used, bytenr);
1739         if (cache) {
1740                 if (cache->start <= bytenr) {
1741                         /*
1742                          * |///////Used///////|
1743                          *      |<--insert--->|
1744                          *      bytenr
1745                          */
1746                         len = min_t(u64, len, cache->start + cache->size -
1747                                     bytenr);
1748                         disk_bytenr = bytenr;
1749                 } else {
1750                         /*
1751                          *              |//Used//|
1752                          *  |<-insert-->|
1753                          *  bytenr
1754                          */
1755                         len = min(len, cache->start - bytenr);
1756                         disk_bytenr = 0;
1757                         datacsum = 0;
1758                 }
1759         } else {
1760                 /*
1761                  * |//Used//|           |EOF
1762                  *          |<-insert-->|
1763                  *          bytenr
1764                  */
1765                 disk_bytenr = 0;
1766                 datacsum = 0;
1767         }
1768
1769         if (disk_bytenr) {
1770                 /* Check if the range is in a data block group */
1771                 bg_cache = btrfs_lookup_block_group(root->fs_info, bytenr);
1772                 if (!bg_cache)
1773                         return -ENOENT;
1774                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
1775                         return -EINVAL;
1776
1777                 /* The extent should never cross block group boundary */
1778                 len = min_t(u64, len, bg_cache->key.objectid +
1779                             bg_cache->key.offset - bytenr);
1780         }
1781
1782         BUG_ON(len != round_down(len, root->sectorsize));
1783         ret = btrfs_record_file_extent(trans, root, ino, inode, bytenr,
1784                                        disk_bytenr, len);
1785         if (ret < 0)
1786                 return ret;
1787
1788         if (datacsum)
1789                 ret = csum_disk_extent(trans, root, bytenr, len);
1790         *ret_len = len;
1791         return ret;
1792 }
1793
1794
1795 /*
1796  * Relocate old fs data in one reserved ranges
1797  *
1798  * Since all old fs data in reserved range is not covered by any chunk nor
1799  * data extent, we don't need to handle any reference but add new
1800  * extent/reference, which makes codes more clear
1801  */
1802 static int migrate_one_reserved_range(struct btrfs_trans_handle *trans,
1803                                       struct btrfs_root *root,
1804                                       struct cache_tree *used,
1805                                       struct btrfs_inode_item *inode, int fd,
1806                                       u64 ino, u64 start, u64 len, int datacsum)
1807 {
1808         u64 cur_off = start;
1809         u64 cur_len = len;
1810         struct cache_extent *cache;
1811         struct btrfs_key key;
1812         struct extent_buffer *eb;
1813         int ret = 0;
1814
1815         while (cur_off < start + len) {
1816                 cache = lookup_cache_extent(used, cur_off, cur_len);
1817                 if (!cache)
1818                         break;
1819                 cur_off = max(cache->start, cur_off);
1820                 cur_len = min(cache->start + cache->size, start + len) -
1821                           cur_off;
1822                 BUG_ON(cur_len < root->sectorsize);
1823
1824                 /* reserve extent for the data */
1825                 ret = btrfs_reserve_extent(trans, root, cur_len, 0, 0, (u64)-1,
1826                                            &key, 1);
1827                 if (ret < 0)
1828                         break;
1829
1830                 eb = malloc(sizeof(*eb) + cur_len);
1831                 if (!eb) {
1832                         ret = -ENOMEM;
1833                         break;
1834                 }
1835
1836                 ret = pread(fd, eb->data, cur_len, cur_off);
1837                 if (ret < cur_len) {
1838                         ret = (ret < 0 ? ret : -EIO);
1839                         free(eb);
1840                         break;
1841                 }
1842                 eb->start = key.objectid;
1843                 eb->len = key.offset;
1844
1845                 /* Write the data */
1846                 ret = write_and_map_eb(trans, root, eb);
1847                 free(eb);
1848                 if (ret < 0)
1849                         break;
1850
1851                 /* Now handle extent item and file extent things */
1852                 ret = btrfs_record_file_extent(trans, root, ino, inode, cur_off,
1853                                                key.objectid, key.offset);
1854                 if (ret < 0)
1855                         break;
1856                 /* Finally, insert csum items */
1857                 if (datacsum)
1858                         ret = csum_disk_extent(trans, root, key.objectid,
1859                                                key.offset);
1860
1861                 cur_off += key.offset;
1862                 cur_len = start + len - cur_off;
1863         }
1864         return ret;
1865 }
1866
1867 /*
1868  * Relocate the used ext2 data in reserved ranges
1869  * [0,1M)
1870  * [btrfs_sb_offset(1), +BTRFS_STRIPE_LEN)
1871  * [btrfs_sb_offset(2), +BTRFS_STRIPE_LEN)
1872  */
1873 static int migrate_reserved_ranges(struct btrfs_trans_handle *trans,
1874                                    struct btrfs_root *root,
1875                                    struct cache_tree *used,
1876                                    struct btrfs_inode_item *inode, int fd,
1877                                    u64 ino, u64 total_bytes, int datacsum)
1878 {
1879         u64 cur_off;
1880         u64 cur_len;
1881         int ret = 0;
1882
1883         /* 0 ~ 1M */
1884         cur_off = 0;
1885         cur_len = 1024 * 1024;
1886         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
1887                                          cur_off, cur_len, datacsum);
1888         if (ret < 0)
1889                 return ret;
1890
1891         /* second sb(fisrt sb is included in 0~1M) */
1892         cur_off = btrfs_sb_offset(1);
1893         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
1894         if (cur_off < total_bytes)
1895                 return ret;
1896         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
1897                                          cur_off, cur_len, datacsum);
1898         if (ret < 0)
1899                 return ret;
1900
1901         /* Last sb */
1902         cur_off = btrfs_sb_offset(2);
1903         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
1904         if (cur_off < total_bytes)
1905                 return ret;
1906         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
1907                                          cur_off, cur_len, datacsum);
1908         return ret;
1909 }
1910
1911 static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size,
1912                                 int ensure_size);
1913
1914 /*
1915  * Create the fs image file of old filesystem.
1916  *
1917  * This is completely fs independent as we have cctx->used, only
1918  * need to create file extents pointing to all the positions.
1919  */
1920 static int create_image_v2(struct btrfs_root *root,
1921                            struct btrfs_mkfs_config *cfg,
1922                            struct btrfs_convert_context *cctx, int fd,
1923                            u64 size, char *name, int datacsum)
1924 {
1925         struct btrfs_inode_item buf;
1926         struct btrfs_trans_handle *trans;
1927         struct btrfs_path *path = NULL;
1928         struct btrfs_key key;
1929         struct cache_extent *cache;
1930         struct cache_tree used_tmp;
1931         u64 cur;
1932         u64 ino;
1933         int ret;
1934
1935         trans = btrfs_start_transaction(root, 1);
1936         if (!trans)
1937                 return -ENOMEM;
1938
1939         cache_tree_init(&used_tmp);
1940
1941         ret = btrfs_find_free_objectid(trans, root, BTRFS_FIRST_FREE_OBJECTID,
1942                                        &ino);
1943         if (ret < 0)
1944                 goto out;
1945         ret = btrfs_new_inode(trans, root, ino, 0600 | S_IFREG);
1946         if (ret < 0)
1947                 goto out;
1948         ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name,
1949                              strlen(name), BTRFS_FT_REG_FILE, NULL, 1);
1950         if (ret < 0)
1951                 goto out;
1952
1953         path = btrfs_alloc_path();
1954         if (!path) {
1955                 ret = -ENOMEM;
1956                 goto out;
1957         }
1958         key.objectid = ino;
1959         key.type = BTRFS_INODE_ITEM_KEY;
1960         key.offset = 0;
1961
1962         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1963         if (ret) {
1964                 ret = (ret > 0 ? -ENOENT : ret);
1965                 goto out;
1966         }
1967         read_extent_buffer(path->nodes[0], &buf,
1968                         btrfs_item_ptr_offset(path->nodes[0], path->slots[0]),
1969                         sizeof(buf));
1970         btrfs_release_path(path);
1971
1972         /*
1973          * Create a new used space cache, which doesn't contain the reserved
1974          * range
1975          */
1976         for (cache = first_cache_extent(&cctx->used); cache;
1977              cache = next_cache_extent(cache)) {
1978                 ret = add_cache_extent(&used_tmp, cache->start, cache->size);
1979                 if (ret < 0)
1980                         goto out;
1981         }
1982         ret = wipe_reserved_ranges(&used_tmp, 0, 0);
1983         if (ret < 0)
1984                 goto out;
1985
1986         /*
1987          * Start from 1M, as 0~1M is reserved, and create_image_file_range_v2()
1988          * can't handle bytenr 0(will consider it as a hole)
1989          */
1990         cur = 1024 * 1024;
1991         while (cur < size) {
1992                 u64 len = size - cur;
1993
1994                 ret = create_image_file_range_v2(trans, root, &used_tmp,
1995                                                 &buf, ino, cur, &len, datacsum);
1996                 if (ret < 0)
1997                         goto out;
1998                 cur += len;
1999         }
2000         /* Handle the reserved ranges */
2001         ret = migrate_reserved_ranges(trans, root, &cctx->used, &buf, fd, ino,
2002                                       cfg->num_bytes, datacsum);
2003
2004
2005         key.objectid = ino;
2006         key.type = BTRFS_INODE_ITEM_KEY;
2007         key.offset = 0;
2008         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2009         if (ret) {
2010                 ret = (ret > 0 ? -ENOENT : ret);
2011                 goto out;
2012         }
2013         btrfs_set_stack_inode_size(&buf, cfg->num_bytes);
2014         write_extent_buffer(path->nodes[0], &buf,
2015                         btrfs_item_ptr_offset(path->nodes[0], path->slots[0]),
2016                         sizeof(buf));
2017 out:
2018         free_extent_cache_tree(&used_tmp);
2019         btrfs_free_path(path);
2020         btrfs_commit_transaction(trans, root);
2021         return ret;
2022 }
2023
2024 static struct btrfs_root * link_subvol(struct btrfs_root *root,
2025                 const char *base, u64 root_objectid)
2026 {
2027         struct btrfs_trans_handle *trans;
2028         struct btrfs_fs_info *fs_info = root->fs_info;
2029         struct btrfs_root *tree_root = fs_info->tree_root;
2030         struct btrfs_root *new_root = NULL;
2031         struct btrfs_path *path;
2032         struct btrfs_inode_item *inode_item;
2033         struct extent_buffer *leaf;
2034         struct btrfs_key key;
2035         u64 dirid = btrfs_root_dirid(&root->root_item);
2036         u64 index = 2;
2037         char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
2038         int len;
2039         int i;
2040         int ret;
2041
2042         len = strlen(base);
2043         if (len == 0 || len > BTRFS_NAME_LEN)
2044                 return NULL;
2045
2046         path = btrfs_alloc_path();
2047         BUG_ON(!path);
2048
2049         key.objectid = dirid;
2050         key.type = BTRFS_DIR_INDEX_KEY;
2051         key.offset = (u64)-1;
2052
2053         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2054         BUG_ON(ret <= 0);
2055
2056         if (path->slots[0] > 0) {
2057                 path->slots[0]--;
2058                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2059                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
2060                         index = key.offset + 1;
2061         }
2062         btrfs_release_path(path);
2063
2064         trans = btrfs_start_transaction(root, 1);
2065         BUG_ON(!trans);
2066
2067         key.objectid = dirid;
2068         key.offset = 0;
2069         key.type =  BTRFS_INODE_ITEM_KEY;
2070
2071         ret = btrfs_lookup_inode(trans, root, path, &key, 1);
2072         BUG_ON(ret);
2073         leaf = path->nodes[0];
2074         inode_item = btrfs_item_ptr(leaf, path->slots[0],
2075                                     struct btrfs_inode_item);
2076
2077         key.objectid = root_objectid;
2078         key.offset = (u64)-1;
2079         key.type = BTRFS_ROOT_ITEM_KEY;
2080
2081         memcpy(buf, base, len);
2082         for (i = 0; i < 1024; i++) {
2083                 ret = btrfs_insert_dir_item(trans, root, buf, len,
2084                                             dirid, &key, BTRFS_FT_DIR, index);
2085                 if (ret != -EEXIST)
2086                         break;
2087                 len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
2088                 if (len < 1 || len > BTRFS_NAME_LEN) {
2089                         ret = -EINVAL;
2090                         break;
2091                 }
2092         }
2093         if (ret)
2094                 goto fail;
2095
2096         btrfs_set_inode_size(leaf, inode_item, len * 2 +
2097                              btrfs_inode_size(leaf, inode_item));
2098         btrfs_mark_buffer_dirty(leaf);
2099         btrfs_release_path(path);
2100
2101         /* add the backref first */
2102         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
2103                                  BTRFS_ROOT_BACKREF_KEY,
2104                                  root->root_key.objectid,
2105                                  dirid, index, buf, len);
2106         BUG_ON(ret);
2107
2108         /* now add the forward ref */
2109         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
2110                                  BTRFS_ROOT_REF_KEY, root_objectid,
2111                                  dirid, index, buf, len);
2112
2113         ret = btrfs_commit_transaction(trans, root);
2114         BUG_ON(ret);
2115
2116         new_root = btrfs_read_fs_root(fs_info, &key);
2117         if (IS_ERR(new_root))
2118                 new_root = NULL;
2119 fail:
2120         btrfs_free_path(path);
2121         return new_root;
2122 }
2123
2124 static int create_chunk_mapping(struct btrfs_trans_handle *trans,
2125                                 struct btrfs_root *root)
2126 {
2127         struct btrfs_fs_info *info = root->fs_info;
2128         struct btrfs_root *chunk_root = info->chunk_root;
2129         struct btrfs_root *extent_root = info->extent_root;
2130         struct btrfs_device *device;
2131         struct btrfs_block_group_cache *cache;
2132         struct btrfs_dev_extent *extent;
2133         struct extent_buffer *leaf;
2134         struct btrfs_chunk chunk;
2135         struct btrfs_key key;
2136         struct btrfs_path path;
2137         u64 cur_start;
2138         u64 total_bytes;
2139         u64 chunk_objectid;
2140         int ret;
2141
2142         btrfs_init_path(&path);
2143
2144         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
2145         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2146
2147         BUG_ON(list_empty(&info->fs_devices->devices));
2148         device = list_entry(info->fs_devices->devices.next,
2149                             struct btrfs_device, dev_list);
2150         BUG_ON(device->devid != info->fs_devices->latest_devid);
2151
2152         /* delete device extent created by make_btrfs */
2153         key.objectid = device->devid;
2154         key.offset = 0;
2155         key.type = BTRFS_DEV_EXTENT_KEY;
2156         ret = btrfs_search_slot(trans, device->dev_root, &key, &path, -1, 1);
2157         if (ret < 0)
2158                 goto err;
2159
2160         BUG_ON(ret > 0);
2161         ret = btrfs_del_item(trans, device->dev_root, &path);
2162         if (ret)
2163                 goto err;
2164         btrfs_release_path(&path);
2165
2166         /* delete chunk item created by make_btrfs */
2167         key.objectid = chunk_objectid;
2168         key.offset = 0;
2169         key.type = BTRFS_CHUNK_ITEM_KEY;
2170         ret = btrfs_search_slot(trans, chunk_root, &key, &path, -1, 1);
2171         if (ret < 0)
2172                 goto err;
2173
2174         BUG_ON(ret > 0);
2175         ret = btrfs_del_item(trans, chunk_root, &path);
2176         if (ret)
2177                 goto err;
2178         btrfs_release_path(&path);
2179
2180         /* for each block group, create device extent and chunk item */
2181         cur_start = 0;
2182         while (cur_start < total_bytes) {
2183                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
2184                 BUG_ON(!cache);
2185
2186                 /* insert device extent */
2187                 key.objectid = device->devid;
2188                 key.offset = cache->key.objectid;
2189                 key.type = BTRFS_DEV_EXTENT_KEY;
2190                 ret = btrfs_insert_empty_item(trans, device->dev_root, &path,
2191                                               &key, sizeof(*extent));
2192                 if (ret)
2193                         goto err;
2194
2195                 leaf = path.nodes[0];
2196                 extent = btrfs_item_ptr(leaf, path.slots[0],
2197                                         struct btrfs_dev_extent);
2198
2199                 btrfs_set_dev_extent_chunk_tree(leaf, extent,
2200                                                 chunk_root->root_key.objectid);
2201                 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
2202                                                     chunk_objectid);
2203                 btrfs_set_dev_extent_chunk_offset(leaf, extent,
2204                                                   cache->key.objectid);
2205                 btrfs_set_dev_extent_length(leaf, extent, cache->key.offset);
2206                 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
2207                     (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
2208                     BTRFS_UUID_SIZE);
2209                 btrfs_mark_buffer_dirty(leaf);
2210                 btrfs_release_path(&path);
2211
2212                 /* insert chunk item */
2213                 btrfs_set_stack_chunk_length(&chunk, cache->key.offset);
2214                 btrfs_set_stack_chunk_owner(&chunk,
2215                                             extent_root->root_key.objectid);
2216                 btrfs_set_stack_chunk_stripe_len(&chunk, BTRFS_STRIPE_LEN);
2217                 btrfs_set_stack_chunk_type(&chunk, cache->flags);
2218                 btrfs_set_stack_chunk_io_align(&chunk, device->io_align);
2219                 btrfs_set_stack_chunk_io_width(&chunk, device->io_width);
2220                 btrfs_set_stack_chunk_sector_size(&chunk, device->sector_size);
2221                 btrfs_set_stack_chunk_num_stripes(&chunk, 1);
2222                 btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
2223                 btrfs_set_stack_stripe_devid(&chunk.stripe, device->devid);
2224                 btrfs_set_stack_stripe_offset(&chunk.stripe,
2225                                               cache->key.objectid);
2226                 memcpy(&chunk.stripe.dev_uuid, device->uuid, BTRFS_UUID_SIZE);
2227
2228                 key.objectid = chunk_objectid;
2229                 key.offset = cache->key.objectid;
2230                 key.type = BTRFS_CHUNK_ITEM_KEY;
2231
2232                 ret = btrfs_insert_item(trans, chunk_root, &key, &chunk,
2233                                         btrfs_chunk_item_size(1));
2234                 if (ret)
2235                         goto err;
2236
2237                 cur_start = cache->key.objectid + cache->key.offset;
2238         }
2239
2240         device->bytes_used = total_bytes;
2241         ret = btrfs_update_device(trans, device);
2242 err:
2243         btrfs_release_path(&path);
2244         return ret;
2245 }
2246
2247 static int create_subvol(struct btrfs_trans_handle *trans,
2248                          struct btrfs_root *root, u64 root_objectid)
2249 {
2250         struct extent_buffer *tmp;
2251         struct btrfs_root *new_root;
2252         struct btrfs_key key;
2253         struct btrfs_root_item root_item;
2254         int ret;
2255
2256         ret = btrfs_copy_root(trans, root, root->node, &tmp,
2257                               root_objectid);
2258         BUG_ON(ret);
2259
2260         memcpy(&root_item, &root->root_item, sizeof(root_item));
2261         btrfs_set_root_bytenr(&root_item, tmp->start);
2262         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
2263         btrfs_set_root_generation(&root_item, trans->transid);
2264         free_extent_buffer(tmp);
2265
2266         key.objectid = root_objectid;
2267         key.type = BTRFS_ROOT_ITEM_KEY;
2268         key.offset = trans->transid;
2269         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
2270                                 &key, &root_item);
2271
2272         key.offset = (u64)-1;
2273         new_root = btrfs_read_fs_root(root->fs_info, &key);
2274         BUG_ON(!new_root || IS_ERR(new_root));
2275
2276         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
2277         BUG_ON(ret);
2278
2279         return 0;
2280 }
2281
2282 /*
2283  * New make_btrfs_v2() has handle system and meta chunks quite well.
2284  * So only need to add remaining data chunks.
2285  */
2286 static int make_convert_data_block_groups(struct btrfs_trans_handle *trans,
2287                                           struct btrfs_fs_info *fs_info,
2288                                           struct btrfs_mkfs_config *cfg,
2289                                           struct btrfs_convert_context *cctx)
2290 {
2291         struct btrfs_root *extent_root = fs_info->extent_root;
2292         struct cache_tree *data_chunks = &cctx->data_chunks;
2293         struct cache_extent *cache;
2294         u64 max_chunk_size;
2295         int ret = 0;
2296
2297         /*
2298          * Don't create data chunk over 10% of the convert device
2299          * And for single chunk, don't create chunk larger than 1G.
2300          */
2301         max_chunk_size = cfg->num_bytes / 10;
2302         max_chunk_size = min((u64)(1024 * 1024 * 1024), max_chunk_size);
2303         max_chunk_size = round_down(max_chunk_size, extent_root->sectorsize);
2304
2305         for (cache = first_cache_extent(data_chunks); cache;
2306              cache = next_cache_extent(cache)) {
2307                 u64 cur = cache->start;
2308
2309                 while (cur < cache->start + cache->size) {
2310                         u64 len;
2311                         u64 cur_backup = cur;
2312
2313                         len = min(max_chunk_size,
2314                                   cache->start + cache->size - cur);
2315                         ret = btrfs_alloc_data_chunk(trans, extent_root,
2316                                         &cur_backup, len,
2317                                         BTRFS_BLOCK_GROUP_DATA, 1);
2318                         if (ret < 0)
2319                                 break;
2320                         ret = btrfs_make_block_group(trans, extent_root, 0,
2321                                         BTRFS_BLOCK_GROUP_DATA,
2322                                         BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2323                                         cur, len);
2324                         if (ret < 0)
2325                                 break;
2326                         cur += len;
2327                 }
2328         }
2329         return ret;
2330 }
2331
2332 /*
2333  * Init the temp btrfs to a operational status.
2334  *
2335  * It will fix the extent usage accounting(XXX: Do we really need?) and
2336  * insert needed data chunks, to ensure all old fs data extents are covered
2337  * by DATA chunks, preventing wrong chunks are allocated.
2338  *
2339  * And also create convert image subvolume and relocation tree.
2340  * (XXX: Not need again?)
2341  * But the convert image subvolume is *NOT* linked to fs tree yet.
2342  */
2343 static int init_btrfs_v2(struct btrfs_mkfs_config *cfg, struct btrfs_root *root,
2344                          struct btrfs_convert_context *cctx, int datacsum,
2345                          int packing, int noxattr)
2346 {
2347         struct btrfs_key location;
2348         struct btrfs_trans_handle *trans;
2349         struct btrfs_fs_info *fs_info = root->fs_info;
2350         int ret;
2351
2352         /*
2353          * Don't alloc any metadata/system chunk, as we don't want
2354          * any meta/sys chunk allcated before all data chunks are inserted.
2355          * Or we screw up the chunk layout just like the old implement.
2356          */
2357         fs_info->avoid_sys_chunk_alloc = 1;
2358         fs_info->avoid_meta_chunk_alloc = 1;
2359         trans = btrfs_start_transaction(root, 1);
2360         BUG_ON(!trans);
2361         ret = btrfs_fix_block_accounting(trans, root);
2362         if (ret)
2363                 goto err;
2364         ret = make_convert_data_block_groups(trans, fs_info, cfg, cctx);
2365         if (ret)
2366                 goto err;
2367         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
2368                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
2369         if (ret)
2370                 goto err;
2371         memcpy(&location, &root->root_key, sizeof(location));
2372         location.offset = (u64)-1;
2373         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
2374                                 btrfs_super_root_dir(fs_info->super_copy),
2375                                 &location, BTRFS_FT_DIR, 0);
2376         if (ret)
2377                 goto err;
2378         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
2379                                 location.objectid,
2380                                 btrfs_super_root_dir(fs_info->super_copy), 0);
2381         if (ret)
2382                 goto err;
2383         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
2384                              BTRFS_FIRST_FREE_OBJECTID);
2385
2386         /* subvol for fs image file */
2387         ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
2388         if (ret < 0)
2389                 goto err;
2390         /* subvol for data relocation tree */
2391         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
2392         if (ret < 0)
2393                 goto err;
2394
2395         ret = btrfs_commit_transaction(trans, root);
2396         fs_info->avoid_sys_chunk_alloc = 0;
2397         fs_info->avoid_meta_chunk_alloc = 0;
2398 err:
2399         return ret;
2400 }
2401
2402 static int init_btrfs(struct btrfs_root *root)
2403 {
2404         int ret;
2405         struct btrfs_key location;
2406         struct btrfs_trans_handle *trans;
2407         struct btrfs_fs_info *fs_info = root->fs_info;
2408         struct extent_buffer *tmp;
2409
2410         trans = btrfs_start_transaction(root, 1);
2411         BUG_ON(!trans);
2412         ret = btrfs_make_block_groups(trans, root);
2413         if (ret)
2414                 goto err;
2415         ret = btrfs_fix_block_accounting(trans, root);
2416         if (ret)
2417                 goto err;
2418         ret = create_chunk_mapping(trans, root);
2419         if (ret)
2420                 goto err;
2421         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
2422                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
2423         if (ret)
2424                 goto err;
2425         memcpy(&location, &root->root_key, sizeof(location));
2426         location.offset = (u64)-1;
2427         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
2428                                 btrfs_super_root_dir(fs_info->super_copy),
2429                                 &location, BTRFS_FT_DIR, 0);
2430         if (ret)
2431                 goto err;
2432         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
2433                                 location.objectid,
2434                                 btrfs_super_root_dir(fs_info->super_copy), 0);
2435         if (ret)
2436                 goto err;
2437         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
2438                              BTRFS_FIRST_FREE_OBJECTID);
2439
2440         /* subvol for fs image file */
2441         ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
2442         BUG_ON(ret);
2443         /* subvol for data relocation */
2444         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
2445         BUG_ON(ret);
2446
2447         extent_buffer_get(fs_info->csum_root->node);
2448         ret = __btrfs_cow_block(trans, fs_info->csum_root,
2449                                 fs_info->csum_root->node, NULL, 0, &tmp, 0, 0);
2450         BUG_ON(ret);
2451         free_extent_buffer(tmp);
2452
2453         ret = btrfs_commit_transaction(trans, root);
2454         BUG_ON(ret);
2455 err:
2456         return ret;
2457 }
2458
2459 /*
2460  * Migrate super block to its default position and zero 0 ~ 16k
2461  */
2462 static int migrate_super_block(int fd, u64 old_bytenr, u32 sectorsize)
2463 {
2464         int ret;
2465         struct extent_buffer *buf;
2466         struct btrfs_super_block *super;
2467         u32 len;
2468         u32 bytenr;
2469
2470         BUG_ON(sectorsize < sizeof(*super));
2471         buf = malloc(sizeof(*buf) + sectorsize);
2472         if (!buf)
2473                 return -ENOMEM;
2474
2475         buf->len = sectorsize;
2476         ret = pread(fd, buf->data, sectorsize, old_bytenr);
2477         if (ret != sectorsize)
2478                 goto fail;
2479
2480         super = (struct btrfs_super_block *)buf->data;
2481         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
2482         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
2483
2484         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
2485         ret = pwrite(fd, buf->data, sectorsize, BTRFS_SUPER_INFO_OFFSET);
2486         if (ret != sectorsize)
2487                 goto fail;
2488
2489         ret = fsync(fd);
2490         if (ret)
2491                 goto fail;
2492
2493         memset(buf->data, 0, sectorsize);
2494         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
2495                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
2496                 if (len > sectorsize)
2497                         len = sectorsize;
2498                 ret = pwrite(fd, buf->data, len, bytenr);
2499                 if (ret != len) {
2500                         fprintf(stderr, "unable to zero fill device\n");
2501                         break;
2502                 }
2503                 bytenr += len;
2504         }
2505         ret = 0;
2506         fsync(fd);
2507 fail:
2508         free(buf);
2509         if (ret > 0)
2510                 ret = -1;
2511         return ret;
2512 }
2513
2514 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
2515 {
2516         struct btrfs_chunk *chunk;
2517         struct btrfs_disk_key *key;
2518         u32 sectorsize = btrfs_super_sectorsize(super);
2519
2520         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
2521         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
2522                                        sizeof(struct btrfs_disk_key));
2523
2524         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
2525         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
2526         btrfs_set_disk_key_offset(key, 0);
2527
2528         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
2529         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
2530         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
2531         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
2532         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
2533         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
2534         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
2535         btrfs_set_stack_chunk_num_stripes(chunk, 1);
2536         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
2537         chunk->stripe.devid = super->dev_item.devid;
2538         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
2539         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
2540         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
2541         return 0;
2542 }
2543
2544 static int prepare_system_chunk(int fd, u64 sb_bytenr)
2545 {
2546         int ret;
2547         struct extent_buffer *buf;
2548         struct btrfs_super_block *super;
2549
2550         BUG_ON(BTRFS_SUPER_INFO_SIZE < sizeof(*super));
2551         buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE);
2552         if (!buf)
2553                 return -ENOMEM;
2554
2555         buf->len = BTRFS_SUPER_INFO_SIZE;
2556         ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
2557         if (ret != BTRFS_SUPER_INFO_SIZE)
2558                 goto fail;
2559
2560         super = (struct btrfs_super_block *)buf->data;
2561         BUG_ON(btrfs_super_bytenr(super) != sb_bytenr);
2562         BUG_ON(btrfs_super_num_devices(super) != 1);
2563
2564         ret = prepare_system_chunk_sb(super);
2565         if (ret)
2566                 goto fail;
2567
2568         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
2569         ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
2570         if (ret != BTRFS_SUPER_INFO_SIZE)
2571                 goto fail;
2572
2573         ret = 0;
2574 fail:
2575         free(buf);
2576         if (ret > 0)
2577                 ret = -1;
2578         return ret;
2579 }
2580
2581 static int relocate_one_reference(struct btrfs_trans_handle *trans,
2582                                   struct btrfs_root *root,
2583                                   u64 extent_start, u64 extent_size,
2584                                   struct btrfs_key *extent_key,
2585                                   struct extent_io_tree *reloc_tree)
2586 {
2587         struct extent_buffer *leaf;
2588         struct btrfs_file_extent_item *fi;
2589         struct btrfs_key key;
2590         struct btrfs_path path;
2591         struct btrfs_inode_item inode;
2592         struct blk_iterate_data data;
2593         u64 bytenr;
2594         u64 num_bytes;
2595         u64 cur_offset;
2596         u64 new_pos;
2597         u64 nbytes;
2598         u64 sector_end;
2599         u32 sectorsize = root->sectorsize;
2600         unsigned long ptr;
2601         int datacsum;
2602         int fd;
2603         int ret;
2604
2605         btrfs_init_path(&path);
2606         ret = btrfs_search_slot(trans, root, extent_key, &path, -1, 1);
2607         if (ret)
2608                 goto fail;
2609
2610         leaf = path.nodes[0];
2611         fi = btrfs_item_ptr(leaf, path.slots[0],
2612                             struct btrfs_file_extent_item);
2613         BUG_ON(btrfs_file_extent_offset(leaf, fi) > 0);
2614         if (extent_start != btrfs_file_extent_disk_bytenr(leaf, fi) ||
2615             extent_size != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
2616                 ret = 1;
2617                 goto fail;
2618         }
2619
2620         bytenr = extent_start + btrfs_file_extent_offset(leaf, fi);
2621         num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
2622
2623         ret = btrfs_del_item(trans, root, &path);
2624         if (ret)
2625                 goto fail;
2626
2627         ret = btrfs_free_extent(trans, root, extent_start, extent_size, 0,
2628                                 root->root_key.objectid,
2629                                 extent_key->objectid, extent_key->offset);
2630         if (ret)
2631                 goto fail;
2632
2633         btrfs_release_path(&path);
2634
2635         key.objectid = extent_key->objectid;
2636         key.offset = 0;
2637         key.type =  BTRFS_INODE_ITEM_KEY;
2638         ret = btrfs_lookup_inode(trans, root, &path, &key, 0);
2639         if (ret)
2640                 goto fail;
2641
2642         leaf = path.nodes[0];
2643         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2644         read_extent_buffer(leaf, &inode, ptr, sizeof(inode));
2645         btrfs_release_path(&path);
2646
2647         BUG_ON(num_bytes & (sectorsize - 1));
2648         nbytes = btrfs_stack_inode_nbytes(&inode) - num_bytes;
2649         btrfs_set_stack_inode_nbytes(&inode, nbytes);
2650         datacsum = !(btrfs_stack_inode_flags(&inode) & BTRFS_INODE_NODATASUM);
2651
2652         init_blk_iterate_data(&data, trans, root, &inode, extent_key->objectid,
2653                               datacsum);
2654         data.first_block = extent_key->offset;
2655
2656         cur_offset = extent_key->offset;
2657         while (num_bytes > 0) {
2658                 sector_end = bytenr + sectorsize - 1;
2659                 if (test_range_bit(reloc_tree, bytenr, sector_end,
2660                                    EXTENT_LOCKED, 1)) {
2661                         ret = get_state_private(reloc_tree, bytenr, &new_pos);
2662                         BUG_ON(ret);
2663                 } else {
2664                         ret = custom_alloc_extent(root, sectorsize, 0, &key, 0);
2665                         if (ret)
2666                                 goto fail;
2667                         new_pos = key.objectid;
2668
2669                         if (cur_offset == extent_key->offset) {
2670                                 fd = root->fs_info->fs_devices->latest_bdev;
2671                                 readahead(fd, bytenr, num_bytes);
2672                         }
2673                         ret = copy_disk_extent(root, new_pos, bytenr,
2674                                                sectorsize);
2675                         if (ret)
2676                                 goto fail;
2677                         ret = set_extent_bits(reloc_tree, bytenr, sector_end,
2678                                               EXTENT_LOCKED, GFP_NOFS);
2679                         BUG_ON(ret);
2680                         ret = set_state_private(reloc_tree, bytenr, new_pos);
2681                         BUG_ON(ret);
2682                 }
2683
2684                 ret = block_iterate_proc(new_pos / sectorsize,
2685                                          cur_offset / sectorsize, &data);
2686                 if (ret < 0)
2687                         goto fail;
2688
2689                 cur_offset += sectorsize;
2690                 bytenr += sectorsize;
2691                 num_bytes -= sectorsize;
2692         }
2693
2694         if (data.num_blocks > 0) {
2695                 ret = record_file_blocks(&data, data.first_block,
2696                                          data.disk_block, data.num_blocks);
2697                 if (ret)
2698                         goto fail;
2699         }
2700
2701         key.objectid = extent_key->objectid;
2702         key.offset = 0;
2703         key.type =  BTRFS_INODE_ITEM_KEY;
2704         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
2705         if (ret)
2706                 goto fail;
2707
2708         leaf = path.nodes[0];
2709         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2710         write_extent_buffer(leaf, &inode, ptr, sizeof(inode));
2711         btrfs_mark_buffer_dirty(leaf);
2712         btrfs_release_path(&path);
2713
2714 fail:
2715         btrfs_release_path(&path);
2716         return ret;
2717 }
2718
2719 static int relocate_extents_range(struct btrfs_root *fs_root,
2720                                   struct btrfs_root *image_root,
2721                                   u64 start_byte, u64 end_byte)
2722 {
2723         struct btrfs_fs_info *info = fs_root->fs_info;
2724         struct btrfs_root *extent_root = info->extent_root;
2725         struct btrfs_root *cur_root = NULL;
2726         struct btrfs_trans_handle *trans;
2727         struct btrfs_extent_data_ref *dref;
2728         struct btrfs_extent_inline_ref *iref;
2729         struct btrfs_extent_item *ei;
2730         struct extent_buffer *leaf;
2731         struct btrfs_key key;
2732         struct btrfs_key extent_key;
2733         struct btrfs_path path;
2734         struct extent_io_tree reloc_tree;
2735         unsigned long ptr;
2736         unsigned long end;
2737         u64 cur_byte;
2738         u64 num_bytes;
2739         u64 ref_root;
2740         u64 num_extents;
2741         int pass = 0;
2742         int ret;
2743
2744         btrfs_init_path(&path);
2745         extent_io_tree_init(&reloc_tree);
2746
2747         key.objectid = start_byte;
2748         key.offset = 0;
2749         key.type = BTRFS_EXTENT_ITEM_KEY;
2750         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2751         if (ret < 0)
2752                 goto fail;
2753         if (ret > 0) {
2754                 ret = btrfs_previous_item(extent_root, &path, 0,
2755                                           BTRFS_EXTENT_ITEM_KEY);
2756                 if (ret < 0)
2757                         goto fail;
2758                 if (ret == 0) {
2759                         leaf = path.nodes[0];
2760                         btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2761                         if (key.objectid + key.offset > start_byte)
2762                                 start_byte = key.objectid;
2763                 }
2764         }
2765         btrfs_release_path(&path);
2766 again:
2767         cur_root = (pass % 2 == 0) ? image_root : fs_root;
2768         num_extents = 0;
2769
2770         trans = btrfs_start_transaction(cur_root, 1);
2771         BUG_ON(!trans);
2772
2773         cur_byte = start_byte;
2774         while (1) {
2775                 key.objectid = cur_byte;
2776                 key.offset = 0;
2777                 key.type = BTRFS_EXTENT_ITEM_KEY;
2778                 ret = btrfs_search_slot(trans, extent_root,
2779                                         &key, &path, 0, 0);
2780                 if (ret < 0)
2781                         goto fail;
2782 next:
2783                 leaf = path.nodes[0];
2784                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2785                         ret = btrfs_next_leaf(extent_root, &path);
2786                         if (ret < 0)
2787                                 goto fail;
2788                         if (ret > 0)
2789                                 break;
2790                         leaf = path.nodes[0];
2791                 }
2792
2793                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2794                 if (key.objectid < cur_byte ||
2795                     key.type != BTRFS_EXTENT_ITEM_KEY) {
2796                         path.slots[0]++;
2797                         goto next;
2798                 }
2799                 if (key.objectid >= end_byte)
2800                         break;
2801
2802                 num_extents++;
2803
2804                 cur_byte = key.objectid;
2805                 num_bytes = key.offset;
2806                 ei = btrfs_item_ptr(leaf, path.slots[0],
2807                                     struct btrfs_extent_item);
2808                 BUG_ON(!(btrfs_extent_flags(leaf, ei) &
2809                          BTRFS_EXTENT_FLAG_DATA));
2810
2811                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2812                 end = ptr + btrfs_item_size_nr(leaf, path.slots[0]);
2813
2814                 ptr += sizeof(struct btrfs_extent_item);
2815
2816                 while (ptr < end) {
2817                         iref = (struct btrfs_extent_inline_ref *)ptr;
2818                         key.type = btrfs_extent_inline_ref_type(leaf, iref);
2819                         BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
2820                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
2821                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
2822                         extent_key.objectid =
2823                                 btrfs_extent_data_ref_objectid(leaf, dref);
2824                         extent_key.offset =
2825                                 btrfs_extent_data_ref_offset(leaf, dref);
2826                         extent_key.type = BTRFS_EXTENT_DATA_KEY;
2827                         BUG_ON(btrfs_extent_data_ref_count(leaf, dref) != 1);
2828
2829                         if (ref_root == cur_root->root_key.objectid)
2830                                 break;
2831
2832                         ptr += btrfs_extent_inline_ref_size(key.type);
2833                 }
2834
2835                 if (ptr >= end) {
2836                         path.slots[0]++;
2837                         goto next;
2838                 }
2839
2840                 ret = relocate_one_reference(trans, cur_root, cur_byte,
2841                                              num_bytes, &extent_key,
2842                                              &reloc_tree);
2843                 if (ret < 0)
2844                         goto fail;
2845
2846                 cur_byte += num_bytes;
2847                 btrfs_release_path(&path);
2848
2849                 if (trans->blocks_used >= 4096) {
2850                         ret = btrfs_commit_transaction(trans, cur_root);
2851                         BUG_ON(ret);
2852                         trans = btrfs_start_transaction(cur_root, 1);
2853                         BUG_ON(!trans);
2854                 }
2855         }
2856         btrfs_release_path(&path);
2857
2858         ret = btrfs_commit_transaction(trans, cur_root);
2859         BUG_ON(ret);
2860
2861         if (num_extents > 0 && pass++ < 16)
2862                 goto again;
2863
2864         ret = (num_extents > 0) ? -1 : 0;
2865 fail:
2866         btrfs_release_path(&path);
2867         extent_io_tree_cleanup(&reloc_tree);
2868         return ret;
2869 }
2870
2871 /*
2872  * relocate data in system chunk
2873  */
2874 static int cleanup_sys_chunk(struct btrfs_root *fs_root,
2875                              struct btrfs_root *image_root)
2876 {
2877         struct btrfs_block_group_cache *cache;
2878         int i, ret = 0;
2879         u64 offset = 0;
2880         u64 end_byte;
2881
2882         while(1) {
2883                 cache = btrfs_lookup_block_group(fs_root->fs_info, offset);
2884                 if (!cache)
2885                         break;
2886
2887                 end_byte = cache->key.objectid + cache->key.offset;
2888                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2889                         ret = relocate_extents_range(fs_root, image_root,
2890                                                      cache->key.objectid,
2891                                                      end_byte);
2892                         if (ret)
2893                                 goto fail;
2894                 }
2895                 offset = end_byte;
2896         }
2897         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2898                 offset = btrfs_sb_offset(i);
2899                 offset &= ~((u64)BTRFS_STRIPE_LEN - 1);
2900
2901                 ret = relocate_extents_range(fs_root, image_root,
2902                                              offset, offset + BTRFS_STRIPE_LEN);
2903                 if (ret)
2904                         goto fail;
2905         }
2906         ret = 0;
2907 fail:
2908         return ret;
2909 }
2910
2911 static int fixup_chunk_mapping(struct btrfs_root *root)
2912 {
2913         struct btrfs_trans_handle *trans;
2914         struct btrfs_fs_info *info = root->fs_info;
2915         struct btrfs_root *chunk_root = info->chunk_root;
2916         struct extent_buffer *leaf;
2917         struct btrfs_key key;
2918         struct btrfs_path path;
2919         struct btrfs_chunk chunk;
2920         unsigned long ptr;
2921         u32 size;
2922         u64 type;
2923         int ret;
2924
2925         btrfs_init_path(&path);
2926
2927         trans = btrfs_start_transaction(root, 1);
2928         BUG_ON(!trans);
2929
2930         /*
2931          * recow the whole chunk tree. this will move all chunk tree blocks
2932          * into system block group.
2933          */
2934         memset(&key, 0, sizeof(key));
2935         while (1) {
2936                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2937                 if (ret < 0)
2938                         goto err;
2939
2940                 ret = btrfs_next_leaf(chunk_root, &path);
2941                 if (ret < 0)
2942                         goto err;
2943                 if (ret > 0)
2944                         break;
2945
2946                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2947                 btrfs_release_path(&path);
2948         }
2949         btrfs_release_path(&path);
2950
2951         /* fixup the system chunk array in super block */
2952         btrfs_set_super_sys_array_size(info->super_copy, 0);
2953
2954         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2955         key.offset = 0;
2956         key.type = BTRFS_CHUNK_ITEM_KEY;
2957
2958         ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 0);
2959         if (ret < 0)
2960                 goto err;
2961         BUG_ON(ret != 0);
2962         while(1) {
2963                 leaf = path.nodes[0];
2964                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2965                         ret = btrfs_next_leaf(chunk_root, &path);
2966                         if (ret < 0)
2967                                 goto err;
2968                         if (ret > 0)
2969                                 break;
2970                         leaf = path.nodes[0];
2971                 }
2972                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2973                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2974                         goto next;
2975
2976                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2977                 size = btrfs_item_size_nr(leaf, path.slots[0]);
2978                 BUG_ON(size != sizeof(chunk));
2979                 read_extent_buffer(leaf, &chunk, ptr, size);
2980                 type = btrfs_stack_chunk_type(&chunk);
2981
2982                 if (!(type & BTRFS_BLOCK_GROUP_SYSTEM))
2983                         goto next;
2984
2985                 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
2986                                              &chunk, size);
2987                 if (ret)
2988                         goto err;
2989 next:
2990                 path.slots[0]++;
2991         }
2992
2993         ret = btrfs_commit_transaction(trans, root);
2994         BUG_ON(ret);
2995 err:
2996         btrfs_release_path(&path);
2997         return ret;
2998 }
2999
3000 static const struct btrfs_convert_operations ext2_convert_ops = {
3001         .name                   = "ext2",
3002         .open_fs                = ext2_open_fs,
3003         .read_used_space        = ext2_read_used_space,
3004         .alloc_block            = ext2_alloc_block,
3005         .alloc_block_range      = ext2_alloc_block_range,
3006         .copy_inodes            = ext2_copy_inodes,
3007         .test_block             = ext2_test_block,
3008         .free_block             = ext2_free_block,
3009         .free_block_range       = ext2_free_block_range,
3010         .close_fs               = ext2_close_fs,
3011 };
3012
3013 static const struct btrfs_convert_operations *convert_operations[] = {
3014         &ext2_convert_ops,
3015 };
3016
3017 static int convert_open_fs(const char *devname,
3018                            struct btrfs_convert_context *cctx)
3019 {
3020         int i;
3021
3022         memset(cctx, 0, sizeof(*cctx));
3023
3024         for (i = 0; i < ARRAY_SIZE(convert_operations); i++) {
3025                 int ret = convert_operations[i]->open_fs(cctx, devname);
3026
3027                 if (ret == 0) {
3028                         cctx->convert_ops = convert_operations[i];
3029                         return ret;
3030                 }
3031         }
3032
3033         fprintf(stderr, "No file system found to convert.\n");
3034         return -1;
3035 }
3036
3037 /*
3038  * Remove one reserve range from given cache tree
3039  * if min_stripe_size is non-zero, it will ensure for split case,
3040  * all its split cache extent is no smaller than @min_strip_size / 2.
3041  */
3042 static int wipe_one_reserved_range(struct cache_tree *tree,
3043                                    u64 start, u64 len, u64 min_stripe_size,
3044                                    int ensure_size)
3045 {
3046         struct cache_extent *cache;
3047         int ret;
3048
3049         BUG_ON(ensure_size && min_stripe_size == 0);
3050         /*
3051          * The logical here is simplified to handle special cases only
3052          * So we don't need to consider merge case for ensure_size
3053          */
3054         BUG_ON(min_stripe_size && (min_stripe_size < len * 2 ||
3055                min_stripe_size / 2 < BTRFS_STRIPE_LEN));
3056
3057         /* Also, wipe range should already be aligned */
3058         BUG_ON(start != round_down(start, BTRFS_STRIPE_LEN) ||
3059                start + len != round_up(start + len, BTRFS_STRIPE_LEN));
3060
3061         min_stripe_size /= 2;
3062
3063         cache = lookup_cache_extent(tree, start, len);
3064         if (!cache)
3065                 return 0;
3066
3067         if (start <= cache->start) {
3068                 /*
3069                  *      |--------cache---------|
3070                  * |-wipe-|
3071                  */
3072                 BUG_ON(start + len <= cache->start);
3073
3074                 /*
3075                  * The wipe size is smaller than min_stripe_size / 2,
3076                  * so the result length should still meet min_stripe_size
3077                  * And no need to do alignment
3078                  */
3079                 cache->size -= (start + len - cache->start);
3080                 if (cache->size == 0) {
3081                         remove_cache_extent(tree, cache);
3082                         free(cache);
3083                         return 0;
3084                 }
3085
3086                 BUG_ON(ensure_size && cache->size < min_stripe_size);
3087
3088                 cache->start = start + len;
3089                 return 0;
3090         } else if (start > cache->start && start + len < cache->start +
3091                    cache->size) {
3092                 /*
3093                  * |-------cache-----|
3094                  *      |-wipe-|
3095                  */
3096                 u64 old_len = cache->size;
3097                 u64 insert_start = start + len;
3098                 u64 insert_len;
3099
3100                 cache->size = start - cache->start;
3101                 if (ensure_size)
3102                         cache->size = max(cache->size, min_stripe_size);
3103                 cache->start = start - cache->size;
3104
3105                 /* And insert the new one */
3106                 insert_len = old_len - start - len;
3107                 if (ensure_size)
3108                         insert_len = max(insert_len, min_stripe_size);
3109
3110                 ret = add_merge_cache_extent(tree, insert_start, insert_len);
3111                 return ret;
3112         }
3113         /*
3114          * |----cache-----|
3115          *              |--wipe-|
3116          * Wipe len should be small enough and no need to expand the
3117          * remaining extent
3118          */
3119         cache->size = start - cache->start;
3120         BUG_ON(ensure_size && cache->size < min_stripe_size);
3121         return 0;
3122 }
3123
3124 /*
3125  * Remove reserved ranges from given cache_tree
3126  *
3127  * It will remove the following ranges
3128  * 1) 0~1M
3129  * 2) 2nd superblock, +64K (make sure chunks are 64K aligned)
3130  * 3) 3rd superblock, +64K
3131  *
3132  * @min_stripe must be given for safety check
3133  * and if @ensure_size is given, it will ensure affected cache_extent will be
3134  * larger than min_stripe_size
3135  */
3136 static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size,
3137                                 int ensure_size)
3138 {
3139         int ret;
3140
3141         ret = wipe_one_reserved_range(tree, 0, 1024 * 1024, min_stripe_size,
3142                                       ensure_size);
3143         if (ret < 0)
3144                 return ret;
3145         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(1),
3146                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
3147         if (ret < 0)
3148                 return ret;
3149         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(2),
3150                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
3151         return ret;
3152 }
3153
3154 static int calculate_available_space(struct btrfs_convert_context *cctx)
3155 {
3156         struct cache_tree *used = &cctx->used;
3157         struct cache_tree *data_chunks = &cctx->data_chunks;
3158         struct cache_tree *free = &cctx->free;
3159         struct cache_extent *cache;
3160         u64 cur_off = 0;
3161         /*
3162          * Twice the minimal chunk size, to allow later wipe_reserved_ranges()
3163          * works without need to consider overlap
3164          */
3165         u64 min_stripe_size = 2 * 16 * 1024 * 1024;
3166         int ret;
3167
3168         /* Calculate data_chunks */
3169         for (cache = first_cache_extent(used); cache;
3170              cache = next_cache_extent(cache)) {
3171                 u64 cur_len;
3172
3173                 if (cache->start + cache->size < cur_off)
3174                         continue;
3175                 if (cache->start > cur_off + min_stripe_size)
3176                         cur_off = cache->start;
3177                 cur_len = max(cache->start + cache->size - cur_off,
3178                               min_stripe_size);
3179                 ret = add_merge_cache_extent(data_chunks, cur_off, cur_len);
3180                 if (ret < 0)
3181                         goto out;
3182                 cur_off += cur_len;
3183         }
3184         /*
3185          * remove reserved ranges, so we won't ever bother relocating an old
3186          * filesystem extent to other place.
3187          */
3188         ret = wipe_reserved_ranges(data_chunks, min_stripe_size, 1);
3189         if (ret < 0)
3190                 goto out;
3191
3192         cur_off = 0;
3193         /*
3194          * Calculate free space
3195          * Always round up the start bytenr, to avoid metadata extent corss
3196          * stripe boundary, as later mkfs_convert() won't have all the extent
3197          * allocation check
3198          */
3199         for (cache = first_cache_extent(data_chunks); cache;
3200              cache = next_cache_extent(cache)) {
3201                 if (cache->start < cur_off)
3202                         continue;
3203                 if (cache->start > cur_off) {
3204                         u64 insert_start;
3205                         u64 len;
3206
3207                         len = cache->start - round_up(cur_off,
3208                                                       BTRFS_STRIPE_LEN);
3209                         insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
3210
3211                         ret = add_merge_cache_extent(free, insert_start, len);
3212                         if (ret < 0)
3213                                 goto out;
3214                 }
3215                 cur_off = cache->start + cache->size;
3216         }
3217         /* Don't forget the last range */
3218         if (cctx->total_bytes > cur_off) {
3219                 u64 len = cctx->total_bytes - cur_off;
3220                 u64 insert_start;
3221
3222                 insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
3223
3224                 ret = add_merge_cache_extent(free, insert_start, len);
3225                 if (ret < 0)
3226                         goto out;
3227         }
3228
3229         /* Remove reserved bytes */
3230         ret = wipe_reserved_ranges(free, min_stripe_size, 0);
3231 out:
3232         return ret;
3233 }
3234 /*
3235  * Read used space, and since we have the used space,
3236  * calcuate data_chunks and free for later mkfs
3237  */
3238 static int convert_read_used_space(struct btrfs_convert_context *cctx)
3239 {
3240         int ret;
3241
3242         ret = cctx->convert_ops->read_used_space(cctx);
3243         if (ret)
3244                 return ret;
3245
3246         ret = calculate_available_space(cctx);
3247         return ret;
3248 }
3249
3250 static int do_convert_v2(const char *devname, int datacsum, int packing,
3251                 int noxattr, u32 nodesize, int copylabel, const char *fslabel,
3252                 int progress, u64 features)
3253 {
3254         int ret;
3255         int fd = -1;
3256         int is_btrfs = 0;
3257         u32 blocksize;
3258         u64 total_bytes;
3259         struct btrfs_root *root;
3260         struct btrfs_root *image_root;
3261         struct btrfs_convert_context cctx;
3262         struct btrfs_key key;
3263         char *subvol_name = NULL;
3264         struct task_ctx ctx;
3265         char features_buf[64];
3266         struct btrfs_mkfs_config mkfs_cfg;
3267
3268         init_convert_context(&cctx);
3269         ret = convert_open_fs(devname, &cctx);
3270         if (ret)
3271                 goto fail;
3272         ret = convert_read_used_space(&cctx);
3273         if (ret)
3274                 goto fail;
3275
3276         blocksize = cctx.blocksize;
3277         total_bytes = (u64)blocksize * (u64)cctx.block_count;
3278         if (blocksize < 4096) {
3279                 fprintf(stderr, "block size is too small\n");
3280                 goto fail;
3281         }
3282         if (btrfs_check_nodesize(nodesize, blocksize, features))
3283                 goto fail;
3284         fd = open(devname, O_RDWR);
3285         if (fd < 0) {
3286                 fprintf(stderr, "unable to open %s\n", devname);
3287                 goto fail;
3288         }
3289         btrfs_parse_features_to_string(features_buf, features);
3290         if (features == BTRFS_MKFS_DEFAULT_FEATURES)
3291                 strcat(features_buf, " (default)");
3292
3293         printf("create btrfs filesystem:\n");
3294         printf("\tblocksize: %u\n", blocksize);
3295         printf("\tnodesize:  %u\n", nodesize);
3296         printf("\tfeatures:  %s\n", features_buf);
3297
3298         mkfs_cfg.label = cctx.volume_name;
3299         mkfs_cfg.num_bytes = total_bytes;
3300         mkfs_cfg.nodesize = nodesize;
3301         mkfs_cfg.sectorsize = blocksize;
3302         mkfs_cfg.stripesize = blocksize;
3303         mkfs_cfg.features = features;
3304         /* New convert need these space */
3305         mkfs_cfg.fs_uuid = malloc(BTRFS_UUID_UNPARSED_SIZE);
3306         mkfs_cfg.chunk_uuid = malloc(BTRFS_UUID_UNPARSED_SIZE);
3307         *(mkfs_cfg.fs_uuid) = '\0';
3308         *(mkfs_cfg.chunk_uuid) = '\0';
3309
3310         ret = make_btrfs(fd, &mkfs_cfg, &cctx);
3311         if (ret) {
3312                 fprintf(stderr, "unable to create initial ctree: %s\n",
3313                         strerror(-ret));
3314                 goto fail;
3315         }
3316
3317         root = open_ctree_fd(fd, devname, mkfs_cfg.super_bytenr,
3318                              OPEN_CTREE_WRITES);
3319         if (!root) {
3320                 fprintf(stderr, "unable to open ctree\n");
3321                 goto fail;
3322         }
3323         ret = init_btrfs_v2(&mkfs_cfg, root, &cctx, datacsum, packing, noxattr);
3324         if (ret) {
3325                 fprintf(stderr, "unable to setup the root tree\n");
3326                 goto fail;
3327         }
3328
3329         printf("creating %s image file.\n", cctx.convert_ops->name);
3330         ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name);
3331         if (ret < 0) {
3332                 fprintf(stderr, "error allocating subvolume name: %s_saved\n",
3333                         cctx.convert_ops->name);
3334                 goto fail;
3335         }
3336         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
3337         key.offset = (u64)-1;
3338         key.type = BTRFS_ROOT_ITEM_KEY;
3339         image_root = btrfs_read_fs_root(root->fs_info, &key);
3340         if (!image_root) {
3341                 fprintf(stderr, "unable to create subvol\n");
3342                 goto fail;
3343         }
3344         ret = create_image_v2(image_root, &mkfs_cfg, &cctx, fd,
3345                               mkfs_cfg.num_bytes, "image", datacsum);
3346         if (ret) {
3347                 fprintf(stderr, "error during create_image %d\n", ret);
3348                 goto fail;
3349         }
3350
3351         printf("creating btrfs metadata.\n");
3352         ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
3353         ctx.cur_copy_inodes = 0;
3354
3355         if (progress) {
3356                 ctx.info = task_init(print_copied_inodes, after_copied_inodes,
3357                                      &ctx);
3358                 task_start(ctx.info);
3359         }
3360         ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx);
3361         if (ret) {
3362                 fprintf(stderr, "error during copy_inodes %d\n", ret);
3363                 goto fail;
3364         }
3365         if (progress) {
3366                 task_stop(ctx.info);
3367                 task_deinit(ctx.info);
3368         }
3369
3370         image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
3371
3372         free(subvol_name);
3373
3374         memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
3375         if (copylabel == 1) {
3376                 __strncpy_null(root->fs_info->super_copy->label,
3377                                 cctx.volume_name, BTRFS_LABEL_SIZE - 1);
3378                 fprintf(stderr, "copy label '%s'\n",
3379                                 root->fs_info->super_copy->label);
3380         } else if (copylabel == -1) {
3381                 strcpy(root->fs_info->super_copy->label, fslabel);
3382                 fprintf(stderr, "set label to '%s'\n", fslabel);
3383         }
3384
3385         ret = close_ctree(root);
3386         if (ret) {
3387                 fprintf(stderr, "error during close_ctree %d\n", ret);
3388                 goto fail;
3389         }
3390         convert_close_fs(&cctx);
3391         clean_convert_context(&cctx);
3392
3393         /*
3394          * If this step succeed, we get a mountable btrfs. Otherwise
3395          * the source fs is left unchanged.
3396          */
3397         ret = migrate_super_block(fd, mkfs_cfg.super_bytenr, blocksize);
3398         if (ret) {
3399                 fprintf(stderr, "unable to migrate super block\n");
3400                 goto fail;
3401         }
3402         is_btrfs = 1;
3403
3404         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
3405         if (!root) {
3406                 fprintf(stderr, "unable to open ctree\n");
3407                 goto fail;
3408         }
3409         close(fd);
3410
3411         printf("conversion complete.\n");
3412         return 0;
3413 fail:
3414         clean_convert_context(&cctx);
3415         if (fd != -1)
3416                 close(fd);
3417         if (is_btrfs)
3418                 fprintf(stderr,
3419                         "WARNING: an error occurred during chunk mapping fixup, filesystem mountable but not finalized\n");
3420         else
3421                 fprintf(stderr, "conversion aborted\n");
3422         return -1;
3423 }
3424
3425 static int do_convert(const char *devname, int datacsum, int packing, int noxattr,
3426                 u32 nodesize, int copylabel, const char *fslabel, int progress,
3427                 u64 features)
3428 {
3429         int i, ret, blocks_per_node;
3430         int fd = -1;
3431         int is_btrfs = 0;
3432         u32 blocksize;
3433         u64 blocks[7];
3434         u64 total_bytes;
3435         u64 super_bytenr;
3436         struct btrfs_root *root;
3437         struct btrfs_root *image_root;
3438         struct btrfs_convert_context cctx;
3439         char *subvol_name = NULL;
3440         struct task_ctx ctx;
3441         char features_buf[64];
3442         struct btrfs_mkfs_config mkfs_cfg;
3443
3444         init_convert_context(&cctx);
3445         ret = convert_open_fs(devname, &cctx);
3446         if (ret)
3447                 goto fail;
3448         ret = convert_read_used_space(&cctx);
3449         if (ret)
3450                 goto fail;
3451
3452         blocksize = cctx.blocksize;
3453         total_bytes = (u64)blocksize * (u64)cctx.block_count;
3454         if (blocksize < 4096) {
3455                 fprintf(stderr, "block size is too small\n");
3456                 goto fail;
3457         }
3458         if (btrfs_check_nodesize(nodesize, blocksize, features))
3459                 goto fail;
3460         blocks_per_node = nodesize / blocksize;
3461         ret = -blocks_per_node;
3462         for (i = 0; i < 7; i++) {
3463                 if (nodesize == blocksize)
3464                         ret = convert_alloc_block(&cctx, 0, blocks + i);
3465                 else
3466                         ret = convert_alloc_block_range(&cctx,
3467                                         ret + blocks_per_node, blocks_per_node,
3468                                         blocks + i);
3469                 if (ret) {
3470                         fprintf(stderr, "not enough free space\n");
3471                         goto fail;
3472                 }
3473                 blocks[i] *= blocksize;
3474         }
3475         super_bytenr = blocks[0];
3476         fd = open(devname, O_RDWR);
3477         if (fd < 0) {
3478                 fprintf(stderr, "unable to open %s\n", devname);
3479                 goto fail;
3480         }
3481         btrfs_parse_features_to_string(features_buf, features);
3482         if (features == BTRFS_MKFS_DEFAULT_FEATURES)
3483                 strcat(features_buf, " (default)");
3484
3485         printf("create btrfs filesystem:\n");
3486         printf("\tblocksize: %u\n", blocksize);
3487         printf("\tnodesize:  %u\n", nodesize);
3488         printf("\tfeatures:  %s\n", features_buf);
3489
3490         mkfs_cfg.label = cctx.volume_name;
3491         mkfs_cfg.fs_uuid = NULL;
3492         memcpy(mkfs_cfg.blocks, blocks, sizeof(blocks));
3493         mkfs_cfg.num_bytes = total_bytes;
3494         mkfs_cfg.nodesize = nodesize;
3495         mkfs_cfg.sectorsize = blocksize;
3496         mkfs_cfg.stripesize = blocksize;
3497         mkfs_cfg.features = features;
3498
3499         ret = make_btrfs(fd, &mkfs_cfg, NULL);
3500         if (ret) {
3501                 fprintf(stderr, "unable to create initial ctree: %s\n",
3502                         strerror(-ret));
3503                 goto fail;
3504         }
3505         /* create a system chunk that maps the whole device */
3506         ret = prepare_system_chunk(fd, super_bytenr);
3507         if (ret) {
3508                 fprintf(stderr, "unable to update system chunk\n");
3509                 goto fail;
3510         }
3511         root = open_ctree_fd(fd, devname, super_bytenr, OPEN_CTREE_WRITES);
3512         if (!root) {
3513                 fprintf(stderr, "unable to open ctree\n");
3514                 goto fail;
3515         }
3516         ret = cache_free_extents(root, &cctx);
3517         if (ret) {
3518                 fprintf(stderr, "error during cache_free_extents %d\n", ret);
3519                 goto fail;
3520         }
3521         root->fs_info->extent_ops = &extent_ops;
3522         /* recover block allocation bitmap */
3523         for (i = 0; i < 7; i++) {
3524                 blocks[i] /= blocksize;
3525                 if (nodesize == blocksize)
3526                         convert_free_block(&cctx, blocks[i]);
3527                 else
3528                         convert_free_block_range(&cctx, blocks[i],
3529                                         blocks_per_node);
3530         }
3531         ret = init_btrfs(root);
3532         if (ret) {
3533                 fprintf(stderr, "unable to setup the root tree\n");
3534                 goto fail;
3535         }
3536         printf("creating btrfs metadata.\n");
3537         ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
3538         ctx.cur_copy_inodes = 0;
3539
3540         if (progress) {
3541                 ctx.info = task_init(print_copied_inodes, after_copied_inodes, &ctx);
3542                 task_start(ctx.info);
3543         }
3544         ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx);
3545         if (ret) {
3546                 fprintf(stderr, "error during copy_inodes %d\n", ret);
3547                 goto fail;
3548         }
3549         if (progress) {
3550                 task_stop(ctx.info);
3551                 task_deinit(ctx.info);
3552         }
3553
3554         printf("creating %s image file.\n", cctx.convert_ops->name);
3555         ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name);
3556         if (ret < 0) {
3557                 fprintf(stderr, "error allocating subvolume name: %s_saved\n",
3558                         cctx.convert_ops->name);
3559                 goto fail;
3560         }
3561
3562         image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
3563
3564         free(subvol_name);
3565
3566         if (!image_root) {
3567                 fprintf(stderr, "unable to create subvol\n");
3568                 goto fail;
3569         }
3570         ret = create_image(&cctx, image_root, "image", datacsum);
3571         if (ret) {
3572                 fprintf(stderr, "error during create_image %d\n", ret);
3573                 goto fail;
3574         }
3575         memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
3576         if (copylabel == 1) {
3577                 __strncpy_null(root->fs_info->super_copy->label,
3578                                 cctx.volume_name, BTRFS_LABEL_SIZE - 1);
3579                 fprintf(stderr, "copy label '%s'\n",
3580                                 root->fs_info->super_copy->label);
3581         } else if (copylabel == -1) {
3582                 strcpy(root->fs_info->super_copy->label, fslabel);
3583                 fprintf(stderr, "set label to '%s'\n", fslabel);
3584         }
3585
3586         printf("cleaning up system chunk.\n");
3587         ret = cleanup_sys_chunk(root, image_root);
3588         if (ret) {
3589                 fprintf(stderr, "error during cleanup_sys_chunk %d\n", ret);
3590                 goto fail;
3591         }
3592         ret = close_ctree(root);
3593         if (ret) {
3594                 fprintf(stderr, "error during close_ctree %d\n", ret);
3595                 goto fail;
3596         }
3597         convert_close_fs(&cctx);
3598         clean_convert_context(&cctx);
3599
3600         /*
3601          * If this step succeed, we get a mountable btrfs. Otherwise
3602          * the source fs is left unchanged.
3603          */
3604         ret = migrate_super_block(fd, super_bytenr, blocksize);
3605         if (ret) {
3606                 fprintf(stderr, "unable to migrate super block\n");
3607                 goto fail;
3608         }
3609         is_btrfs = 1;
3610
3611         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
3612         if (!root) {
3613                 fprintf(stderr, "unable to open ctree\n");
3614                 goto fail;
3615         }
3616         /* move chunk tree into system chunk. */
3617         ret = fixup_chunk_mapping(root);
3618         if (ret) {
3619                 fprintf(stderr, "error during fixup_chunk_tree\n");
3620                 goto fail;
3621         }
3622         ret = close_ctree(root);
3623         close(fd);
3624
3625         printf("conversion complete.\n");
3626         return 0;
3627 fail:
3628         clean_convert_context(&cctx);
3629         if (fd != -1)
3630                 close(fd);
3631         if (is_btrfs)
3632                 fprintf(stderr,
3633                         "WARNING: an error occured during chunk mapping fixup, filesystem mountable but not finalized\n");
3634         else
3635                 fprintf(stderr, "conversion aborted\n");
3636         return -1;
3637 }
3638
3639 static int may_rollback(struct btrfs_root *root)
3640 {
3641         struct btrfs_fs_info *info = root->fs_info;
3642         struct btrfs_multi_bio *multi = NULL;
3643         u64 bytenr;
3644         u64 length;
3645         u64 physical;
3646         u64 total_bytes;
3647         int num_stripes;
3648         int ret;
3649
3650         if (btrfs_super_num_devices(info->super_copy) != 1)
3651                 goto fail;
3652
3653         bytenr = BTRFS_SUPER_INFO_OFFSET;
3654         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
3655
3656         while (1) {
3657                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
3658                                       &length, &multi, 0, NULL);
3659                 if (ret) {
3660                         if (ret == -ENOENT) {
3661                                 /* removed block group at the tail */
3662                                 if (length == (u64)-1)
3663                                         break;
3664
3665                                 /* removed block group in the middle */
3666                                 goto next;
3667                         }
3668                         goto fail;
3669                 }
3670
3671                 num_stripes = multi->num_stripes;
3672                 physical = multi->stripes[0].physical;
3673                 kfree(multi);
3674
3675                 if (num_stripes != 1 || physical != bytenr)
3676                         goto fail;
3677 next:
3678                 bytenr += length;
3679                 if (bytenr >= total_bytes)
3680                         break;
3681         }
3682         return 0;
3683 fail:
3684         return -1;
3685 }
3686
3687 static int do_rollback(const char *devname)
3688 {
3689         int fd = -1;
3690         int ret;
3691         int i;
3692         struct btrfs_root *root;
3693         struct btrfs_root *image_root;
3694         struct btrfs_root *chunk_root;
3695         struct btrfs_dir_item *dir;
3696         struct btrfs_inode_item *inode;
3697         struct btrfs_file_extent_item *fi;
3698         struct btrfs_trans_handle *trans;
3699         struct extent_buffer *leaf;
3700         struct btrfs_block_group_cache *cache1;
3701         struct btrfs_block_group_cache *cache2;
3702         struct btrfs_key key;
3703         struct btrfs_path path;
3704         struct extent_io_tree io_tree;
3705         char *buf = NULL;
3706         char *name;
3707         u64 bytenr;
3708         u64 num_bytes;
3709         u64 root_dir;
3710         u64 objectid;
3711         u64 offset;
3712         u64 start;
3713         u64 end;
3714         u64 sb_bytenr;
3715         u64 first_free;
3716         u64 total_bytes;
3717         u32 sectorsize;
3718
3719         extent_io_tree_init(&io_tree);
3720
3721         fd = open(devname, O_RDWR);
3722         if (fd < 0) {
3723                 fprintf(stderr, "unable to open %s\n", devname);
3724                 goto fail;
3725         }
3726         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
3727         if (!root) {
3728                 fprintf(stderr, "unable to open ctree\n");
3729                 goto fail;
3730         }
3731         ret = may_rollback(root);
3732         if (ret < 0) {
3733                 fprintf(stderr, "unable to do rollback\n");
3734                 goto fail;
3735         }
3736
3737         sectorsize = root->sectorsize;
3738         buf = malloc(sectorsize);
3739         if (!buf) {
3740                 fprintf(stderr, "unable to allocate memory\n");
3741                 goto fail;
3742         }
3743
3744         btrfs_init_path(&path);
3745
3746         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
3747         key.type = BTRFS_ROOT_BACKREF_KEY;
3748         key.offset = BTRFS_FS_TREE_OBJECTID;
3749         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0,
3750                                 0);
3751         btrfs_release_path(&path);
3752         if (ret > 0) {
3753                 fprintf(stderr,
3754                 "ERROR: unable to convert ext2 image subvolume, is it deleted?\n");
3755                 goto fail;
3756         } else if (ret < 0) {
3757                 fprintf(stderr,
3758                         "ERROR: unable to open ext2_saved, id=%llu: %s\n",
3759                         (unsigned long long)key.objectid, strerror(-ret));
3760                 goto fail;
3761         }
3762
3763         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
3764         key.type = BTRFS_ROOT_ITEM_KEY;
3765         key.offset = (u64)-1;
3766         image_root = btrfs_read_fs_root(root->fs_info, &key);
3767         if (!image_root || IS_ERR(image_root)) {
3768                 fprintf(stderr, "unable to open subvol %llu\n",
3769                         (unsigned long long)key.objectid);
3770                 goto fail;
3771         }
3772
3773         name = "image";
3774         root_dir = btrfs_root_dirid(&root->root_item);
3775         dir = btrfs_lookup_dir_item(NULL, image_root, &path,
3776                                    root_dir, name, strlen(name), 0);
3777         if (!dir || IS_ERR(dir)) {
3778                 fprintf(stderr, "unable to find file %s\n", name);
3779                 goto fail;
3780         }
3781         leaf = path.nodes[0];
3782         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
3783         btrfs_release_path(&path);
3784
3785         objectid = key.objectid;
3786
3787         ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0);
3788         if (ret) {
3789                 fprintf(stderr, "unable to find inode item\n");
3790                 goto fail;
3791         }
3792         leaf = path.nodes[0];
3793         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
3794         total_bytes = btrfs_inode_size(leaf, inode);
3795         btrfs_release_path(&path);
3796
3797         key.objectid = objectid;
3798         key.offset = 0;
3799         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3800         ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0);
3801         if (ret != 0) {
3802                 fprintf(stderr, "unable to find first file extent\n");
3803                 btrfs_release_path(&path);
3804                 goto fail;
3805         }
3806
3807         /* build mapping tree for the relocated blocks */
3808         for (offset = 0; offset < total_bytes; ) {
3809                 leaf = path.nodes[0];
3810                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3811                         ret = btrfs_next_leaf(root, &path);
3812                         if (ret != 0)
3813                                 break;  
3814                         continue;
3815                 }
3816
3817                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3818                 if (key.objectid != objectid || key.offset != offset ||
3819                     btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3820                         break;
3821
3822                 fi = btrfs_item_ptr(leaf, path.slots[0],
3823                                     struct btrfs_file_extent_item);
3824                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
3825                         break;
3826                 if (btrfs_file_extent_compression(leaf, fi) ||
3827                     btrfs_file_extent_encryption(leaf, fi) ||
3828                     btrfs_file_extent_other_encoding(leaf, fi))
3829                         break;
3830
3831                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
3832                 /* skip holes and direct mapped extents */
3833                 if (bytenr == 0 || bytenr == offset)
3834                         goto next_extent;
3835
3836                 bytenr += btrfs_file_extent_offset(leaf, fi);
3837                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3838
3839                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
3840                 cache2 = btrfs_lookup_block_group(root->fs_info,
3841                                                   offset + num_bytes - 1);
3842                 /*
3843                  * Here we must take consideration of old and new convert
3844                  * behavior.
3845                  * For old convert case, sign, there is no consist chunk type
3846                  * that will cover the extent. META/DATA/SYS are all possible.
3847                  * Just ensure relocate one is in SYS chunk.
3848                  * For new convert case, they are all covered by DATA chunk.
3849                  *
3850                  * So, there is not valid chunk type check for it now.
3851                  */
3852                 if (cache1 != cache2)
3853                         break;
3854
3855                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
3856                                 EXTENT_LOCKED, GFP_NOFS);
3857                 set_state_private(&io_tree, offset, bytenr);
3858 next_extent:
3859                 offset += btrfs_file_extent_num_bytes(leaf, fi);
3860                 path.slots[0]++;
3861         }
3862         btrfs_release_path(&path);
3863
3864         if (offset < total_bytes) {
3865                 fprintf(stderr, "unable to build extent mapping\n");
3866                 fprintf(stderr, "converted filesystem after balance is unable to rollback\n");
3867                 goto fail;
3868         }
3869
3870         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
3871         first_free &= ~((u64)sectorsize - 1);
3872         /* backup for extent #0 should exist */
3873         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
3874                 fprintf(stderr, "no backup for the first extent\n");
3875                 goto fail;
3876         }
3877         /* force no allocation from system block group */
3878         root->fs_info->system_allocs = -1;
3879         trans = btrfs_start_transaction(root, 1);
3880         BUG_ON(!trans);
3881         /*
3882          * recow the whole chunk tree, this will remove all chunk tree blocks
3883          * from system block group
3884          */
3885         chunk_root = root->fs_info->chunk_root;
3886         memset(&key, 0, sizeof(key));
3887         while (1) {
3888                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
3889                 if (ret < 0)
3890                         break;
3891
3892                 ret = btrfs_next_leaf(chunk_root, &path);
3893                 if (ret)
3894                         break;
3895
3896                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
3897                 btrfs_release_path(&path);
3898         }
3899         btrfs_release_path(&path);
3900
3901         offset = 0;
3902         num_bytes = 0;
3903         while(1) {
3904                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
3905                 if (!cache1)
3906                         break;
3907
3908                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
3909                         num_bytes += btrfs_block_group_used(&cache1->item);
3910
3911                 offset = cache1->key.objectid + cache1->key.offset;
3912         }
3913         /* only extent #0 left in system block group? */
3914         if (num_bytes > first_free) {
3915                 fprintf(stderr, "unable to empty system block group\n");
3916                 goto fail;
3917         }
3918         /* create a system chunk that maps the whole device */
3919         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
3920         if (ret) {
3921                 fprintf(stderr, "unable to update system chunk\n");
3922                 goto fail;
3923         }
3924
3925         ret = btrfs_commit_transaction(trans, root);
3926         BUG_ON(ret);
3927
3928         ret = close_ctree(root);
3929         if (ret) {
3930                 fprintf(stderr, "error during close_ctree %d\n", ret);
3931                 goto fail;
3932         }
3933
3934         /* zero btrfs super block mirrors */
3935         memset(buf, 0, sectorsize);
3936         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3937                 bytenr = btrfs_sb_offset(i);
3938                 if (bytenr >= total_bytes)
3939                         break;
3940                 ret = pwrite(fd, buf, sectorsize, bytenr);
3941                 if (ret != sectorsize) {
3942                         fprintf(stderr,
3943                                 "error during zeroing superblock %d: %d\n",
3944                                 i, ret);
3945                         goto fail;
3946                 }
3947         }
3948
3949         sb_bytenr = (u64)-1;
3950         /* copy all relocated blocks back */
3951         while(1) {
3952                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
3953                                             EXTENT_LOCKED);
3954                 if (ret)
3955                         break;
3956
3957                 ret = get_state_private(&io_tree, start, &bytenr);
3958                 BUG_ON(ret);
3959
3960                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
3961                                   GFP_NOFS);
3962
3963                 while (start <= end) {
3964                         if (start == BTRFS_SUPER_INFO_OFFSET) {
3965                                 sb_bytenr = bytenr;
3966                                 goto next_sector;
3967                         }
3968                         ret = pread(fd, buf, sectorsize, bytenr);
3969                         if (ret < 0) {
3970                                 fprintf(stderr, "error during pread %d\n", ret);
3971                                 goto fail;
3972                         }
3973                         BUG_ON(ret != sectorsize);
3974                         ret = pwrite(fd, buf, sectorsize, start);
3975                         if (ret < 0) {
3976                                 fprintf(stderr, "error during pwrite %d\n", ret);
3977                                 goto fail;
3978                         }
3979                         BUG_ON(ret != sectorsize);
3980 next_sector:
3981                         start += sectorsize;
3982                         bytenr += sectorsize;
3983                 }
3984         }
3985
3986         ret = fsync(fd);
3987         if (ret) {
3988                 fprintf(stderr, "error during fsync %d\n", ret);
3989                 goto fail;
3990         }
3991         /*
3992          * finally, overwrite btrfs super block.
3993          */
3994         ret = pread(fd, buf, sectorsize, sb_bytenr);
3995         if (ret < 0) {
3996                 fprintf(stderr, "error during pread %d\n", ret);
3997                 goto fail;
3998         }
3999         BUG_ON(ret != sectorsize);
4000         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
4001         if (ret < 0) {
4002                 fprintf(stderr, "error during pwrite %d\n", ret);
4003                 goto fail;
4004         }
4005         BUG_ON(ret != sectorsize);
4006         ret = fsync(fd);
4007         if (ret) {
4008                 fprintf(stderr, "error during fsync %d\n", ret);
4009                 goto fail;
4010         }
4011
4012         close(fd);
4013         free(buf);
4014         extent_io_tree_cleanup(&io_tree);
4015         printf("rollback complete.\n");
4016         return 0;
4017
4018 fail:
4019         if (fd != -1)
4020                 close(fd);
4021         free(buf);
4022         fprintf(stderr, "rollback aborted.\n");
4023         return -1;
4024 }
4025
4026 static void print_usage(void)
4027 {
4028         printf("usage: btrfs-convert [options] device\n");
4029         printf("options:\n");
4030         printf("\t-d|--no-datasum        disable data checksum, sets NODATASUM\n");
4031         printf("\t-i|--no-xattr          ignore xattrs and ACLs\n");
4032         printf("\t-n|--no-inline         disable inlining of small files to metadata\n");
4033         printf("\t-N|--nodesize SIZE     set filesystem metadata nodesize\n");
4034         printf("\t-r|--rollback          roll back to the original filesystem\n");
4035         printf("\t-l|--label LABEL       set filesystem label\n");
4036         printf("\t-L|--copy-label        use label from converted filesystem\n");
4037         printf("\t-p|--progress          show converting progress (default)\n");
4038         printf("\t-O|--features LIST     comma separated list of filesystem features\n");
4039         printf("\t--no-progress          show only overview, not the detailed progress\n");
4040 }
4041
4042 int main(int argc, char *argv[])
4043 {
4044         int ret;
4045         int packing = 1;
4046         int noxattr = 0;
4047         int datacsum = 1;
4048         u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
4049                         BTRFS_MKFS_DEFAULT_NODE_SIZE);
4050         int rollback = 0;
4051         int copylabel = 0;
4052         int usage_error = 0;
4053         int progress = 1;
4054         char *file;
4055         char fslabel[BTRFS_LABEL_SIZE];
4056         u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
4057
4058         while(1) {
4059                 enum { GETOPT_VAL_NO_PROGRESS = 256 };
4060                 static const struct option long_options[] = {
4061                         { "no-progress", no_argument, NULL,
4062                                 GETOPT_VAL_NO_PROGRESS },
4063                         { "no-datasum", no_argument, NULL, 'd' },
4064                         { "no-inline", no_argument, NULL, 'n' },
4065                         { "no-xattr", no_argument, NULL, 'i' },
4066                         { "rollback", no_argument, NULL, 'r' },
4067                         { "features", required_argument, NULL, 'O' },
4068                         { "progress", no_argument, NULL, 'p' },
4069                         { "label", required_argument, NULL, 'l' },
4070                         { "copy-label", no_argument, NULL, 'L' },
4071                         { "nodesize", required_argument, NULL, 'N' },
4072                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
4073                         { NULL, 0, NULL, 0 }
4074                 };
4075                 int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL);
4076
4077                 if (c < 0)
4078                         break;
4079                 switch(c) {
4080                         case 'd':
4081                                 datacsum = 0;
4082                                 break;
4083                         case 'i':
4084                                 noxattr = 1;
4085                                 break;
4086                         case 'n':
4087                                 packing = 0;
4088                                 break;
4089                         case 'N':
4090                                 nodesize = parse_size(optarg);
4091                                 break;
4092                         case 'r':
4093                                 rollback = 1;
4094                                 break;
4095                         case 'l':
4096                                 copylabel = -1;
4097                                 if (strlen(optarg) >= BTRFS_LABEL_SIZE) {
4098                                         fprintf(stderr,
4099                                 "WARNING: label too long, trimmed to %d bytes\n",
4100                                                 BTRFS_LABEL_SIZE - 1);
4101                                 }
4102                                 __strncpy_null(fslabel, optarg, BTRFS_LABEL_SIZE - 1);
4103                                 break;
4104                         case 'L':
4105                                 copylabel = 1;
4106                                 break;
4107                         case 'p':
4108                                 progress = 1;
4109                                 break;
4110                         case 'O': {
4111                                 char *orig = strdup(optarg);
4112                                 char *tmp = orig;
4113
4114                                 tmp = btrfs_parse_fs_features(tmp, &features);
4115                                 if (tmp) {
4116                                         fprintf(stderr,
4117                                                 "Unrecognized filesystem feature '%s'\n",
4118                                                         tmp);
4119                                         free(orig);
4120                                         exit(1);
4121                                 }
4122                                 free(orig);
4123                                 if (features & BTRFS_FEATURE_LIST_ALL) {
4124                                         btrfs_list_all_fs_features(
4125                                                 ~BTRFS_CONVERT_ALLOWED_FEATURES);
4126                                         exit(0);
4127                                 }
4128                                 if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) {
4129                                         char buf[64];
4130
4131                                         btrfs_parse_features_to_string(buf,
4132                                                 features & ~BTRFS_CONVERT_ALLOWED_FEATURES);
4133                                         fprintf(stderr,
4134                                                 "ERROR: features not allowed for convert: %s\n",
4135                                                 buf);
4136                                         exit(1);
4137                                 }
4138
4139                                 break;
4140                                 }
4141                         case GETOPT_VAL_NO_PROGRESS:
4142                                 progress = 0;
4143                                 break;
4144                         case GETOPT_VAL_HELP:
4145                         default:
4146                                 print_usage();
4147                                 return c != GETOPT_VAL_HELP;
4148                 }
4149         }
4150         set_argv0(argv);
4151         if (check_argc_exact(argc - optind, 1)) {
4152                 print_usage();
4153                 return 1;
4154         }
4155
4156         if (rollback && (!datacsum || noxattr || !packing)) {
4157                 fprintf(stderr,
4158                         "Usage error: -d, -i, -n options do not apply to rollback\n");
4159                 usage_error++;
4160         }
4161
4162         if (usage_error) {
4163                 print_usage();
4164                 return 1;
4165         }
4166
4167         file = argv[optind];
4168         ret = check_mounted(file);
4169         if (ret < 0) {
4170                 fprintf(stderr, "Could not check mount status: %s\n",
4171                         strerror(-ret));
4172                 return 1;
4173         } else if (ret) {
4174                 fprintf(stderr, "%s is mounted\n", file);
4175                 return 1;
4176         }
4177
4178         if (rollback) {
4179                 ret = do_rollback(file);
4180         } else {
4181                 ret = do_convert_v2(file, datacsum, packing, noxattr, nodesize,
4182                                 copylabel, fslabel, progress, features);
4183         }
4184         if (ret)
4185                 return 1;
4186         return 0;
4187 }