btrfs-progs: mkfs: make list of source fs more visible
[platform/upstream/btrfs-progs.git] / convert / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include "kerncompat.h"
20
21 #include <sys/ioctl.h>
22 #include <sys/mount.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <unistd.h>
29 #include <uuid/uuid.h>
30 #include <linux/limits.h>
31 #include <getopt.h>
32
33 #include "ctree.h"
34 #include "disk-io.h"
35 #include "volumes.h"
36 #include "transaction.h"
37 #include "crc32c.h"
38 #include "utils.h"
39 #include "task-utils.h"
40 #include "help.h"
41 #include "mkfs/common.h"
42 #include "convert/common.h"
43 #include "convert/source-fs.h"
44 #include "fsfeatures.h"
45
46 const struct btrfs_convert_operations ext2_convert_ops;
47
48 static const struct btrfs_convert_operations *convert_operations[] = {
49 #if BTRFSCONVERT_EXT2
50         &ext2_convert_ops,
51 #endif
52 };
53
54 static void *print_copied_inodes(void *p)
55 {
56         struct task_ctx *priv = p;
57         const char work_indicator[] = { '.', 'o', 'O', 'o' };
58         uint32_t count = 0;
59
60         task_period_start(priv->info, 1000 /* 1s */);
61         while (1) {
62                 count++;
63                 printf("copy inodes [%c] [%10d/%10d]\r",
64                        work_indicator[count % 4], priv->cur_copy_inodes,
65                        priv->max_copy_inodes);
66                 fflush(stdout);
67                 task_period_wait(priv->info);
68         }
69
70         return NULL;
71 }
72
73 static int after_copied_inodes(void *p)
74 {
75         printf("\n");
76         fflush(stdout);
77
78         return 0;
79 }
80
81 static inline int copy_inodes(struct btrfs_convert_context *cctx,
82                               struct btrfs_root *root, int datacsum,
83                               int packing, int noxattr, struct task_ctx *p)
84 {
85         return cctx->convert_ops->copy_inodes(cctx, root, datacsum, packing,
86                                              noxattr, p);
87 }
88
89 static inline void convert_close_fs(struct btrfs_convert_context *cctx)
90 {
91         cctx->convert_ops->close_fs(cctx);
92 }
93
94 static inline int convert_check_state(struct btrfs_convert_context *cctx)
95 {
96         return cctx->convert_ops->check_state(cctx);
97 }
98
99 static int csum_disk_extent(struct btrfs_trans_handle *trans,
100                             struct btrfs_root *root,
101                             u64 disk_bytenr, u64 num_bytes)
102 {
103         u32 blocksize = root->sectorsize;
104         u64 offset;
105         char *buffer;
106         int ret = 0;
107
108         buffer = malloc(blocksize);
109         if (!buffer)
110                 return -ENOMEM;
111         for (offset = 0; offset < num_bytes; offset += blocksize) {
112                 ret = read_disk_extent(root, disk_bytenr + offset,
113                                         blocksize, buffer);
114                 if (ret)
115                         break;
116                 ret = btrfs_csum_file_block(trans,
117                                             root->fs_info->csum_root,
118                                             disk_bytenr + num_bytes,
119                                             disk_bytenr + offset,
120                                             buffer, blocksize);
121                 if (ret)
122                         break;
123         }
124         free(buffer);
125         return ret;
126 }
127
128 static int create_image_file_range(struct btrfs_trans_handle *trans,
129                                       struct btrfs_root *root,
130                                       struct cache_tree *used,
131                                       struct btrfs_inode_item *inode,
132                                       u64 ino, u64 bytenr, u64 *ret_len,
133                                       int datacsum)
134 {
135         struct cache_extent *cache;
136         struct btrfs_block_group_cache *bg_cache;
137         u64 len = *ret_len;
138         u64 disk_bytenr;
139         int i;
140         int ret;
141
142         if (bytenr != round_down(bytenr, root->sectorsize)) {
143                 error("bytenr not sectorsize aligned: %llu",
144                                 (unsigned long long)bytenr);
145                 return -EINVAL;
146         }
147         if (len != round_down(len, root->sectorsize)) {
148                 error("length not sectorsize aligned: %llu",
149                                 (unsigned long long)len);
150                 return -EINVAL;
151         }
152         len = min_t(u64, len, BTRFS_MAX_EXTENT_SIZE);
153
154         /*
155          * Skip sb ranges first
156          * [0, 1M), [sb_offset(1), +64K), [sb_offset(2), +64K].
157          *
158          * Or we will insert a hole into current image file, and later
159          * migrate block will fail as there is already a file extent.
160          */
161         if (bytenr < 1024 * 1024) {
162                 *ret_len = 1024 * 1024 - bytenr;
163                 return 0;
164         }
165         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
166                 u64 cur = btrfs_sb_offset(i);
167
168                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
169                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
170                         return 0;
171                 }
172         }
173         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
174                 u64 cur = btrfs_sb_offset(i);
175
176                 /*
177                  *      |--reserved--|
178                  * |----range-------|
179                  * May still need to go through file extent inserts
180                  */
181                 if (bytenr < cur && bytenr + len >= cur) {
182                         len = min_t(u64, len, cur - bytenr);
183                         break;
184                 }
185                 /*
186                  * |--reserved--|
187                  *      |---range---|
188                  * Drop out, no need to insert anything
189                  */
190                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
191                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
192                         return 0;
193                 }
194         }
195
196         cache = search_cache_extent(used, bytenr);
197         if (cache) {
198                 if (cache->start <= bytenr) {
199                         /*
200                          * |///////Used///////|
201                          *      |<--insert--->|
202                          *      bytenr
203                          */
204                         len = min_t(u64, len, cache->start + cache->size -
205                                     bytenr);
206                         disk_bytenr = bytenr;
207                 } else {
208                         /*
209                          *              |//Used//|
210                          *  |<-insert-->|
211                          *  bytenr
212                          */
213                         len = min(len, cache->start - bytenr);
214                         disk_bytenr = 0;
215                         datacsum = 0;
216                 }
217         } else {
218                 /*
219                  * |//Used//|           |EOF
220                  *          |<-insert-->|
221                  *          bytenr
222                  */
223                 disk_bytenr = 0;
224                 datacsum = 0;
225         }
226
227         if (disk_bytenr) {
228                 /* Check if the range is in a data block group */
229                 bg_cache = btrfs_lookup_block_group(root->fs_info, bytenr);
230                 if (!bg_cache)
231                         return -ENOENT;
232                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
233                         return -EINVAL;
234
235                 /* The extent should never cross block group boundary */
236                 len = min_t(u64, len, bg_cache->key.objectid +
237                             bg_cache->key.offset - bytenr);
238         }
239
240         if (len != round_down(len, root->sectorsize)) {
241                 error("remaining length not sectorsize aligned: %llu",
242                                 (unsigned long long)len);
243                 return -EINVAL;
244         }
245         ret = btrfs_record_file_extent(trans, root, ino, inode, bytenr,
246                                        disk_bytenr, len);
247         if (ret < 0)
248                 return ret;
249
250         if (datacsum)
251                 ret = csum_disk_extent(trans, root, bytenr, len);
252         *ret_len = len;
253         return ret;
254 }
255
256 /*
257  * Relocate old fs data in one reserved ranges
258  *
259  * Since all old fs data in reserved range is not covered by any chunk nor
260  * data extent, we don't need to handle any reference but add new
261  * extent/reference, which makes codes more clear
262  */
263 static int migrate_one_reserved_range(struct btrfs_trans_handle *trans,
264                                       struct btrfs_root *root,
265                                       struct cache_tree *used,
266                                       struct btrfs_inode_item *inode, int fd,
267                                       u64 ino, u64 start, u64 len, int datacsum)
268 {
269         u64 cur_off = start;
270         u64 cur_len = len;
271         u64 hole_start = start;
272         u64 hole_len;
273         struct cache_extent *cache;
274         struct btrfs_key key;
275         struct extent_buffer *eb;
276         int ret = 0;
277
278         while (cur_off < start + len) {
279                 cache = lookup_cache_extent(used, cur_off, cur_len);
280                 if (!cache)
281                         break;
282                 cur_off = max(cache->start, cur_off);
283                 cur_len = min(cache->start + cache->size, start + len) -
284                           cur_off;
285                 BUG_ON(cur_len < root->sectorsize);
286
287                 /* reserve extent for the data */
288                 ret = btrfs_reserve_extent(trans, root, cur_len, 0, 0, (u64)-1,
289                                            &key, 1);
290                 if (ret < 0)
291                         break;
292
293                 eb = malloc(sizeof(*eb) + cur_len);
294                 if (!eb) {
295                         ret = -ENOMEM;
296                         break;
297                 }
298
299                 ret = pread(fd, eb->data, cur_len, cur_off);
300                 if (ret < cur_len) {
301                         ret = (ret < 0 ? ret : -EIO);
302                         free(eb);
303                         break;
304                 }
305                 eb->start = key.objectid;
306                 eb->len = key.offset;
307
308                 /* Write the data */
309                 ret = write_and_map_eb(trans, root, eb);
310                 free(eb);
311                 if (ret < 0)
312                         break;
313
314                 /* Now handle extent item and file extent things */
315                 ret = btrfs_record_file_extent(trans, root, ino, inode, cur_off,
316                                                key.objectid, key.offset);
317                 if (ret < 0)
318                         break;
319                 /* Finally, insert csum items */
320                 if (datacsum)
321                         ret = csum_disk_extent(trans, root, key.objectid,
322                                                key.offset);
323
324                 /* Don't forget to insert hole */
325                 hole_len = cur_off - hole_start;
326                 if (hole_len) {
327                         ret = btrfs_record_file_extent(trans, root, ino, inode,
328                                         hole_start, 0, hole_len);
329                         if (ret < 0)
330                                 break;
331                 }
332
333                 cur_off += key.offset;
334                 hole_start = cur_off;
335                 cur_len = start + len - cur_off;
336         }
337         /* Last hole */
338         if (start + len - hole_start > 0)
339                 ret = btrfs_record_file_extent(trans, root, ino, inode,
340                                 hole_start, 0, start + len - hole_start);
341         return ret;
342 }
343
344 /*
345  * Relocate the used ext2 data in reserved ranges
346  * [0,1M)
347  * [btrfs_sb_offset(1), +BTRFS_STRIPE_LEN)
348  * [btrfs_sb_offset(2), +BTRFS_STRIPE_LEN)
349  */
350 static int migrate_reserved_ranges(struct btrfs_trans_handle *trans,
351                                    struct btrfs_root *root,
352                                    struct cache_tree *used,
353                                    struct btrfs_inode_item *inode, int fd,
354                                    u64 ino, u64 total_bytes, int datacsum)
355 {
356         u64 cur_off;
357         u64 cur_len;
358         int ret = 0;
359
360         /* 0 ~ 1M */
361         cur_off = 0;
362         cur_len = 1024 * 1024;
363         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
364                                          cur_off, cur_len, datacsum);
365         if (ret < 0)
366                 return ret;
367
368         /* second sb(fisrt sb is included in 0~1M) */
369         cur_off = btrfs_sb_offset(1);
370         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
371         if (cur_off > total_bytes)
372                 return ret;
373         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
374                                          cur_off, cur_len, datacsum);
375         if (ret < 0)
376                 return ret;
377
378         /* Last sb */
379         cur_off = btrfs_sb_offset(2);
380         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
381         if (cur_off > total_bytes)
382                 return ret;
383         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
384                                          cur_off, cur_len, datacsum);
385         return ret;
386 }
387
388 /*
389  * Helper for expand and merge extent_cache for wipe_one_reserved_range() to
390  * handle wiping a range that exists in cache.
391  */
392 static int _expand_extent_cache(struct cache_tree *tree,
393                                 struct cache_extent *entry,
394                                 u64 min_stripe_size, int backward)
395 {
396         struct cache_extent *ce;
397         int diff;
398
399         if (entry->size >= min_stripe_size)
400                 return 0;
401         diff = min_stripe_size - entry->size;
402
403         if (backward) {
404                 ce = prev_cache_extent(entry);
405                 if (!ce)
406                         goto expand_back;
407                 if (ce->start + ce->size >= entry->start - diff) {
408                         /* Directly merge with previous extent */
409                         ce->size = entry->start + entry->size - ce->start;
410                         remove_cache_extent(tree, entry);
411                         free(entry);
412                         return 0;
413                 }
414 expand_back:
415                 /* No overlap, normal extent */
416                 if (entry->start < diff) {
417                         error("cannot find space for data chunk layout");
418                         return -ENOSPC;
419                 }
420                 entry->start -= diff;
421                 entry->size += diff;
422                 return 0;
423         }
424         ce = next_cache_extent(entry);
425         if (!ce)
426                 goto expand_after;
427         if (entry->start + entry->size + diff >= ce->start) {
428                 /* Directly merge with next extent */
429                 entry->size = ce->start + ce->size - entry->start;
430                 remove_cache_extent(tree, ce);
431                 free(ce);
432                 return 0;
433         }
434 expand_after:
435         entry->size += diff;
436         return 0;
437 }
438
439 /*
440  * Remove one reserve range from given cache tree
441  * if min_stripe_size is non-zero, it will ensure for split case,
442  * all its split cache extent is no smaller than @min_strip_size / 2.
443  */
444 static int wipe_one_reserved_range(struct cache_tree *tree,
445                                    u64 start, u64 len, u64 min_stripe_size,
446                                    int ensure_size)
447 {
448         struct cache_extent *cache;
449         int ret;
450
451         BUG_ON(ensure_size && min_stripe_size == 0);
452         /*
453          * The logical here is simplified to handle special cases only
454          * So we don't need to consider merge case for ensure_size
455          */
456         BUG_ON(min_stripe_size && (min_stripe_size < len * 2 ||
457                min_stripe_size / 2 < BTRFS_STRIPE_LEN));
458
459         /* Also, wipe range should already be aligned */
460         BUG_ON(start != round_down(start, BTRFS_STRIPE_LEN) ||
461                start + len != round_up(start + len, BTRFS_STRIPE_LEN));
462
463         min_stripe_size /= 2;
464
465         cache = lookup_cache_extent(tree, start, len);
466         if (!cache)
467                 return 0;
468
469         if (start <= cache->start) {
470                 /*
471                  *      |--------cache---------|
472                  * |-wipe-|
473                  */
474                 BUG_ON(start + len <= cache->start);
475
476                 /*
477                  * The wipe size is smaller than min_stripe_size / 2,
478                  * so the result length should still meet min_stripe_size
479                  * And no need to do alignment
480                  */
481                 cache->size -= (start + len - cache->start);
482                 if (cache->size == 0) {
483                         remove_cache_extent(tree, cache);
484                         free(cache);
485                         return 0;
486                 }
487
488                 BUG_ON(ensure_size && cache->size < min_stripe_size);
489
490                 cache->start = start + len;
491                 return 0;
492         } else if (start > cache->start && start + len < cache->start +
493                    cache->size) {
494                 /*
495                  * |-------cache-----|
496                  *      |-wipe-|
497                  */
498                 u64 old_start = cache->start;
499                 u64 old_len = cache->size;
500                 u64 insert_start = start + len;
501                 u64 insert_len;
502
503                 cache->size = start - cache->start;
504                 /* Expand the leading half part if needed */
505                 if (ensure_size && cache->size < min_stripe_size) {
506                         ret = _expand_extent_cache(tree, cache,
507                                         min_stripe_size, 1);
508                         if (ret < 0)
509                                 return ret;
510                 }
511
512                 /* And insert the new one */
513                 insert_len = old_start + old_len - start - len;
514                 ret = add_merge_cache_extent(tree, insert_start, insert_len);
515                 if (ret < 0)
516                         return ret;
517
518                 /* Expand the last half part if needed */
519                 if (ensure_size && insert_len < min_stripe_size) {
520                         cache = lookup_cache_extent(tree, insert_start,
521                                                     insert_len);
522                         if (!cache || cache->start != insert_start ||
523                             cache->size != insert_len)
524                                 return -ENOENT;
525                         ret = _expand_extent_cache(tree, cache,
526                                         min_stripe_size, 0);
527                 }
528
529                 return ret;
530         }
531         /*
532          * |----cache-----|
533          *              |--wipe-|
534          * Wipe len should be small enough and no need to expand the
535          * remaining extent
536          */
537         cache->size = start - cache->start;
538         BUG_ON(ensure_size && cache->size < min_stripe_size);
539         return 0;
540 }
541
542 /*
543  * Remove reserved ranges from given cache_tree
544  *
545  * It will remove the following ranges
546  * 1) 0~1M
547  * 2) 2nd superblock, +64K (make sure chunks are 64K aligned)
548  * 3) 3rd superblock, +64K
549  *
550  * @min_stripe must be given for safety check
551  * and if @ensure_size is given, it will ensure affected cache_extent will be
552  * larger than min_stripe_size
553  */
554 static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size,
555                                 int ensure_size)
556 {
557         int ret;
558
559         ret = wipe_one_reserved_range(tree, 0, 1024 * 1024, min_stripe_size,
560                                       ensure_size);
561         if (ret < 0)
562                 return ret;
563         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(1),
564                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
565         if (ret < 0)
566                 return ret;
567         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(2),
568                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
569         return ret;
570 }
571
572 static int calculate_available_space(struct btrfs_convert_context *cctx)
573 {
574         struct cache_tree *used = &cctx->used;
575         struct cache_tree *data_chunks = &cctx->data_chunks;
576         struct cache_tree *free = &cctx->free;
577         struct cache_extent *cache;
578         u64 cur_off = 0;
579         /*
580          * Twice the minimal chunk size, to allow later wipe_reserved_ranges()
581          * works without need to consider overlap
582          */
583         u64 min_stripe_size = 2 * 16 * 1024 * 1024;
584         int ret;
585
586         /* Calculate data_chunks */
587         for (cache = first_cache_extent(used); cache;
588              cache = next_cache_extent(cache)) {
589                 u64 cur_len;
590
591                 if (cache->start + cache->size < cur_off)
592                         continue;
593                 if (cache->start > cur_off + min_stripe_size)
594                         cur_off = cache->start;
595                 cur_len = max(cache->start + cache->size - cur_off,
596                               min_stripe_size);
597                 ret = add_merge_cache_extent(data_chunks, cur_off, cur_len);
598                 if (ret < 0)
599                         goto out;
600                 cur_off += cur_len;
601         }
602         /*
603          * remove reserved ranges, so we won't ever bother relocating an old
604          * filesystem extent to other place.
605          */
606         ret = wipe_reserved_ranges(data_chunks, min_stripe_size, 1);
607         if (ret < 0)
608                 goto out;
609
610         cur_off = 0;
611         /*
612          * Calculate free space
613          * Always round up the start bytenr, to avoid metadata extent corss
614          * stripe boundary, as later mkfs_convert() won't have all the extent
615          * allocation check
616          */
617         for (cache = first_cache_extent(data_chunks); cache;
618              cache = next_cache_extent(cache)) {
619                 if (cache->start < cur_off)
620                         continue;
621                 if (cache->start > cur_off) {
622                         u64 insert_start;
623                         u64 len;
624
625                         len = cache->start - round_up(cur_off,
626                                                       BTRFS_STRIPE_LEN);
627                         insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
628
629                         ret = add_merge_cache_extent(free, insert_start, len);
630                         if (ret < 0)
631                                 goto out;
632                 }
633                 cur_off = cache->start + cache->size;
634         }
635         /* Don't forget the last range */
636         if (cctx->total_bytes > cur_off) {
637                 u64 len = cctx->total_bytes - cur_off;
638                 u64 insert_start;
639
640                 insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
641
642                 ret = add_merge_cache_extent(free, insert_start, len);
643                 if (ret < 0)
644                         goto out;
645         }
646
647         /* Remove reserved bytes */
648         ret = wipe_reserved_ranges(free, min_stripe_size, 0);
649 out:
650         return ret;
651 }
652
653 /*
654  * Read used space, and since we have the used space,
655  * calcuate data_chunks and free for later mkfs
656  */
657 static int convert_read_used_space(struct btrfs_convert_context *cctx)
658 {
659         int ret;
660
661         ret = cctx->convert_ops->read_used_space(cctx);
662         if (ret)
663                 return ret;
664
665         ret = calculate_available_space(cctx);
666         return ret;
667 }
668
669 /*
670  * Create the fs image file of old filesystem.
671  *
672  * This is completely fs independent as we have cctx->used, only
673  * need to create file extents pointing to all the positions.
674  */
675 static int create_image(struct btrfs_root *root,
676                            struct btrfs_mkfs_config *cfg,
677                            struct btrfs_convert_context *cctx, int fd,
678                            u64 size, char *name, int datacsum)
679 {
680         struct btrfs_inode_item buf;
681         struct btrfs_trans_handle *trans;
682         struct btrfs_path path;
683         struct btrfs_key key;
684         struct cache_extent *cache;
685         struct cache_tree used_tmp;
686         u64 cur;
687         u64 ino;
688         u64 flags = BTRFS_INODE_READONLY;
689         int ret;
690
691         if (!datacsum)
692                 flags |= BTRFS_INODE_NODATASUM;
693
694         trans = btrfs_start_transaction(root, 1);
695         if (!trans)
696                 return -ENOMEM;
697
698         cache_tree_init(&used_tmp);
699         btrfs_init_path(&path);
700
701         ret = btrfs_find_free_objectid(trans, root, BTRFS_FIRST_FREE_OBJECTID,
702                                        &ino);
703         if (ret < 0)
704                 goto out;
705         ret = btrfs_new_inode(trans, root, ino, 0400 | S_IFREG);
706         if (ret < 0)
707                 goto out;
708         ret = btrfs_change_inode_flags(trans, root, ino, flags);
709         if (ret < 0)
710                 goto out;
711         ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name,
712                              strlen(name), BTRFS_FT_REG_FILE, NULL, 1);
713         if (ret < 0)
714                 goto out;
715
716         key.objectid = ino;
717         key.type = BTRFS_INODE_ITEM_KEY;
718         key.offset = 0;
719
720         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
721         if (ret) {
722                 ret = (ret > 0 ? -ENOENT : ret);
723                 goto out;
724         }
725         read_extent_buffer(path.nodes[0], &buf,
726                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
727                         sizeof(buf));
728         btrfs_release_path(&path);
729
730         /*
731          * Create a new used space cache, which doesn't contain the reserved
732          * range
733          */
734         for (cache = first_cache_extent(&cctx->used); cache;
735              cache = next_cache_extent(cache)) {
736                 ret = add_cache_extent(&used_tmp, cache->start, cache->size);
737                 if (ret < 0)
738                         goto out;
739         }
740         ret = wipe_reserved_ranges(&used_tmp, 0, 0);
741         if (ret < 0)
742                 goto out;
743
744         /*
745          * Start from 1M, as 0~1M is reserved, and create_image_file_range()
746          * can't handle bytenr 0(will consider it as a hole)
747          */
748         cur = 1024 * 1024;
749         while (cur < size) {
750                 u64 len = size - cur;
751
752                 ret = create_image_file_range(trans, root, &used_tmp,
753                                                 &buf, ino, cur, &len, datacsum);
754                 if (ret < 0)
755                         goto out;
756                 cur += len;
757         }
758         /* Handle the reserved ranges */
759         ret = migrate_reserved_ranges(trans, root, &cctx->used, &buf, fd, ino,
760                                       cfg->num_bytes, datacsum);
761
762
763         key.objectid = ino;
764         key.type = BTRFS_INODE_ITEM_KEY;
765         key.offset = 0;
766         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
767         if (ret) {
768                 ret = (ret > 0 ? -ENOENT : ret);
769                 goto out;
770         }
771         btrfs_set_stack_inode_size(&buf, cfg->num_bytes);
772         write_extent_buffer(path.nodes[0], &buf,
773                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
774                         sizeof(buf));
775 out:
776         free_extent_cache_tree(&used_tmp);
777         btrfs_release_path(&path);
778         btrfs_commit_transaction(trans, root);
779         return ret;
780 }
781
782 static struct btrfs_root* link_subvol(struct btrfs_root *root,
783                 const char *base, u64 root_objectid)
784 {
785         struct btrfs_trans_handle *trans;
786         struct btrfs_fs_info *fs_info = root->fs_info;
787         struct btrfs_root *tree_root = fs_info->tree_root;
788         struct btrfs_root *new_root = NULL;
789         struct btrfs_path path;
790         struct btrfs_inode_item *inode_item;
791         struct extent_buffer *leaf;
792         struct btrfs_key key;
793         u64 dirid = btrfs_root_dirid(&root->root_item);
794         u64 index = 2;
795         char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
796         int len;
797         int i;
798         int ret;
799
800         len = strlen(base);
801         if (len == 0 || len > BTRFS_NAME_LEN)
802                 return NULL;
803
804         btrfs_init_path(&path);
805         key.objectid = dirid;
806         key.type = BTRFS_DIR_INDEX_KEY;
807         key.offset = (u64)-1;
808
809         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
810         if (ret <= 0) {
811                 error("search for DIR_INDEX dirid %llu failed: %d",
812                                 (unsigned long long)dirid, ret);
813                 goto fail;
814         }
815
816         if (path.slots[0] > 0) {
817                 path.slots[0]--;
818                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
819                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
820                         index = key.offset + 1;
821         }
822         btrfs_release_path(&path);
823
824         trans = btrfs_start_transaction(root, 1);
825         if (!trans) {
826                 error("unable to start transaction");
827                 goto fail;
828         }
829
830         key.objectid = dirid;
831         key.offset = 0;
832         key.type =  BTRFS_INODE_ITEM_KEY;
833
834         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
835         if (ret) {
836                 error("search for INODE_ITEM %llu failed: %d",
837                                 (unsigned long long)dirid, ret);
838                 goto fail;
839         }
840         leaf = path.nodes[0];
841         inode_item = btrfs_item_ptr(leaf, path.slots[0],
842                                     struct btrfs_inode_item);
843
844         key.objectid = root_objectid;
845         key.offset = (u64)-1;
846         key.type = BTRFS_ROOT_ITEM_KEY;
847
848         memcpy(buf, base, len);
849         for (i = 0; i < 1024; i++) {
850                 ret = btrfs_insert_dir_item(trans, root, buf, len,
851                                             dirid, &key, BTRFS_FT_DIR, index);
852                 if (ret != -EEXIST)
853                         break;
854                 len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
855                 if (len < 1 || len > BTRFS_NAME_LEN) {
856                         ret = -EINVAL;
857                         break;
858                 }
859         }
860         if (ret)
861                 goto fail;
862
863         btrfs_set_inode_size(leaf, inode_item, len * 2 +
864                              btrfs_inode_size(leaf, inode_item));
865         btrfs_mark_buffer_dirty(leaf);
866         btrfs_release_path(&path);
867
868         /* add the backref first */
869         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
870                                  BTRFS_ROOT_BACKREF_KEY,
871                                  root->root_key.objectid,
872                                  dirid, index, buf, len);
873         if (ret) {
874                 error("unable to add root backref for %llu: %d",
875                                 root->root_key.objectid, ret);
876                 goto fail;
877         }
878
879         /* now add the forward ref */
880         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
881                                  BTRFS_ROOT_REF_KEY, root_objectid,
882                                  dirid, index, buf, len);
883         if (ret) {
884                 error("unable to add root ref for %llu: %d",
885                                 root->root_key.objectid, ret);
886                 goto fail;
887         }
888
889         ret = btrfs_commit_transaction(trans, root);
890         if (ret) {
891                 error("transaction commit failed: %d", ret);
892                 goto fail;
893         }
894
895         new_root = btrfs_read_fs_root(fs_info, &key);
896         if (IS_ERR(new_root)) {
897                 error("unable to fs read root: %lu", PTR_ERR(new_root));
898                 new_root = NULL;
899         }
900 fail:
901         btrfs_init_path(&path);
902         return new_root;
903 }
904
905 static int create_subvol(struct btrfs_trans_handle *trans,
906                          struct btrfs_root *root, u64 root_objectid)
907 {
908         struct extent_buffer *tmp;
909         struct btrfs_root *new_root;
910         struct btrfs_key key;
911         struct btrfs_root_item root_item;
912         int ret;
913
914         ret = btrfs_copy_root(trans, root, root->node, &tmp,
915                               root_objectid);
916         if (ret)
917                 return ret;
918
919         memcpy(&root_item, &root->root_item, sizeof(root_item));
920         btrfs_set_root_bytenr(&root_item, tmp->start);
921         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
922         btrfs_set_root_generation(&root_item, trans->transid);
923         free_extent_buffer(tmp);
924
925         key.objectid = root_objectid;
926         key.type = BTRFS_ROOT_ITEM_KEY;
927         key.offset = trans->transid;
928         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
929                                 &key, &root_item);
930
931         key.offset = (u64)-1;
932         new_root = btrfs_read_fs_root(root->fs_info, &key);
933         if (!new_root || IS_ERR(new_root)) {
934                 error("unable to fs read root: %lu", PTR_ERR(new_root));
935                 return PTR_ERR(new_root);
936         }
937
938         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
939
940         return ret;
941 }
942
943 /*
944  * New make_btrfs() has handle system and meta chunks quite well.
945  * So only need to add remaining data chunks.
946  */
947 static int make_convert_data_block_groups(struct btrfs_trans_handle *trans,
948                                           struct btrfs_fs_info *fs_info,
949                                           struct btrfs_mkfs_config *cfg,
950                                           struct btrfs_convert_context *cctx)
951 {
952         struct btrfs_root *extent_root = fs_info->extent_root;
953         struct cache_tree *data_chunks = &cctx->data_chunks;
954         struct cache_extent *cache;
955         u64 max_chunk_size;
956         int ret = 0;
957
958         /*
959          * Don't create data chunk over 10% of the convert device
960          * And for single chunk, don't create chunk larger than 1G.
961          */
962         max_chunk_size = cfg->num_bytes / 10;
963         max_chunk_size = min((u64)(1024 * 1024 * 1024), max_chunk_size);
964         max_chunk_size = round_down(max_chunk_size, extent_root->sectorsize);
965
966         for (cache = first_cache_extent(data_chunks); cache;
967              cache = next_cache_extent(cache)) {
968                 u64 cur = cache->start;
969
970                 while (cur < cache->start + cache->size) {
971                         u64 len;
972                         u64 cur_backup = cur;
973
974                         len = min(max_chunk_size,
975                                   cache->start + cache->size - cur);
976                         ret = btrfs_alloc_data_chunk(trans, extent_root,
977                                         &cur_backup, len,
978                                         BTRFS_BLOCK_GROUP_DATA, 1);
979                         if (ret < 0)
980                                 break;
981                         ret = btrfs_make_block_group(trans, extent_root, 0,
982                                         BTRFS_BLOCK_GROUP_DATA,
983                                         BTRFS_FIRST_CHUNK_TREE_OBJECTID,
984                                         cur, len);
985                         if (ret < 0)
986                                 break;
987                         cur += len;
988                 }
989         }
990         return ret;
991 }
992
993 /*
994  * Init the temp btrfs to a operational status.
995  *
996  * It will fix the extent usage accounting(XXX: Do we really need?) and
997  * insert needed data chunks, to ensure all old fs data extents are covered
998  * by DATA chunks, preventing wrong chunks are allocated.
999  *
1000  * And also create convert image subvolume and relocation tree.
1001  * (XXX: Not need again?)
1002  * But the convert image subvolume is *NOT* linked to fs tree yet.
1003  */
1004 static int init_btrfs(struct btrfs_mkfs_config *cfg, struct btrfs_root *root,
1005                          struct btrfs_convert_context *cctx, int datacsum,
1006                          int packing, int noxattr)
1007 {
1008         struct btrfs_key location;
1009         struct btrfs_trans_handle *trans;
1010         struct btrfs_fs_info *fs_info = root->fs_info;
1011         int ret;
1012
1013         /*
1014          * Don't alloc any metadata/system chunk, as we don't want
1015          * any meta/sys chunk allcated before all data chunks are inserted.
1016          * Or we screw up the chunk layout just like the old implement.
1017          */
1018         fs_info->avoid_sys_chunk_alloc = 1;
1019         fs_info->avoid_meta_chunk_alloc = 1;
1020         trans = btrfs_start_transaction(root, 1);
1021         if (!trans) {
1022                 error("unable to start transaction");
1023                 ret = -EINVAL;
1024                 goto err;
1025         }
1026         ret = btrfs_fix_block_accounting(trans, root);
1027         if (ret)
1028                 goto err;
1029         ret = make_convert_data_block_groups(trans, fs_info, cfg, cctx);
1030         if (ret)
1031                 goto err;
1032         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1033                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1034         if (ret)
1035                 goto err;
1036         memcpy(&location, &root->root_key, sizeof(location));
1037         location.offset = (u64)-1;
1038         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1039                                 btrfs_super_root_dir(fs_info->super_copy),
1040                                 &location, BTRFS_FT_DIR, 0);
1041         if (ret)
1042                 goto err;
1043         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1044                                 location.objectid,
1045                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1046         if (ret)
1047                 goto err;
1048         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1049                              BTRFS_FIRST_FREE_OBJECTID);
1050
1051         /* subvol for fs image file */
1052         ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
1053         if (ret < 0) {
1054                 error("failed to create subvolume image root: %d", ret);
1055                 goto err;
1056         }
1057         /* subvol for data relocation tree */
1058         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1059         if (ret < 0) {
1060                 error("failed to create DATA_RELOC root: %d", ret);
1061                 goto err;
1062         }
1063
1064         ret = btrfs_commit_transaction(trans, root);
1065         fs_info->avoid_sys_chunk_alloc = 0;
1066         fs_info->avoid_meta_chunk_alloc = 0;
1067 err:
1068         return ret;
1069 }
1070
1071 /*
1072  * Migrate super block to its default position and zero 0 ~ 16k
1073  */
1074 static int migrate_super_block(int fd, u64 old_bytenr)
1075 {
1076         int ret;
1077         struct extent_buffer *buf;
1078         struct btrfs_super_block *super;
1079         u32 len;
1080         u32 bytenr;
1081
1082         buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE);
1083         if (!buf)
1084                 return -ENOMEM;
1085
1086         buf->len = BTRFS_SUPER_INFO_SIZE;
1087         ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, old_bytenr);
1088         if (ret != BTRFS_SUPER_INFO_SIZE)
1089                 goto fail;
1090
1091         super = (struct btrfs_super_block *)buf->data;
1092         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1093         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1094
1095         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1096         ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE,
1097                 BTRFS_SUPER_INFO_OFFSET);
1098         if (ret != BTRFS_SUPER_INFO_SIZE)
1099                 goto fail;
1100
1101         ret = fsync(fd);
1102         if (ret)
1103                 goto fail;
1104
1105         memset(buf->data, 0, BTRFS_SUPER_INFO_SIZE);
1106         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
1107                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
1108                 if (len > BTRFS_SUPER_INFO_SIZE)
1109                         len = BTRFS_SUPER_INFO_SIZE;
1110                 ret = pwrite(fd, buf->data, len, bytenr);
1111                 if (ret != len) {
1112                         fprintf(stderr, "unable to zero fill device\n");
1113                         break;
1114                 }
1115                 bytenr += len;
1116         }
1117         ret = 0;
1118         fsync(fd);
1119 fail:
1120         free(buf);
1121         if (ret > 0)
1122                 ret = -1;
1123         return ret;
1124 }
1125
1126 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
1127 {
1128         struct btrfs_chunk *chunk;
1129         struct btrfs_disk_key *key;
1130         u32 sectorsize = btrfs_super_sectorsize(super);
1131
1132         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1133         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1134                                        sizeof(struct btrfs_disk_key));
1135
1136         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1137         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1138         btrfs_set_disk_key_offset(key, 0);
1139
1140         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
1141         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1142         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1143         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1144         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1145         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1146         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1147         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1148         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1149         chunk->stripe.devid = super->dev_item.devid;
1150         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1151         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1152         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1153         return 0;
1154 }
1155
1156 static int convert_open_fs(const char *devname,
1157                            struct btrfs_convert_context *cctx)
1158 {
1159         int i;
1160
1161         memset(cctx, 0, sizeof(*cctx));
1162
1163         for (i = 0; i < ARRAY_SIZE(convert_operations); i++) {
1164                 int ret = convert_operations[i]->open_fs(cctx, devname);
1165
1166                 if (ret == 0) {
1167                         cctx->convert_ops = convert_operations[i];
1168                         return ret;
1169                 }
1170         }
1171
1172         error("no file system found to convert");
1173         return -1;
1174 }
1175
1176 static int do_convert(const char *devname, int datacsum, int packing,
1177                 int noxattr, u32 nodesize, int copylabel, const char *fslabel,
1178                 int progress, u64 features)
1179 {
1180         int ret;
1181         int fd = -1;
1182         u32 blocksize;
1183         u64 total_bytes;
1184         struct btrfs_root *root;
1185         struct btrfs_root *image_root;
1186         struct btrfs_convert_context cctx;
1187         struct btrfs_key key;
1188         char *subvol_name = NULL;
1189         struct task_ctx ctx;
1190         char features_buf[64];
1191         struct btrfs_mkfs_config mkfs_cfg;
1192
1193         init_convert_context(&cctx);
1194         ret = convert_open_fs(devname, &cctx);
1195         if (ret)
1196                 goto fail;
1197         ret = convert_check_state(&cctx);
1198         if (ret)
1199                 warning(
1200                 "source filesystem is not clean, running filesystem check is recommended");
1201         ret = convert_read_used_space(&cctx);
1202         if (ret)
1203                 goto fail;
1204
1205         blocksize = cctx.blocksize;
1206         total_bytes = (u64)blocksize * (u64)cctx.block_count;
1207         if (blocksize < 4096) {
1208                 error("block size is too small: %u < 4096", blocksize);
1209                 goto fail;
1210         }
1211         if (btrfs_check_nodesize(nodesize, blocksize, features))
1212                 goto fail;
1213         fd = open(devname, O_RDWR);
1214         if (fd < 0) {
1215                 error("unable to open %s: %s", devname, strerror(errno));
1216                 goto fail;
1217         }
1218         btrfs_parse_features_to_string(features_buf, features);
1219         if (features == BTRFS_MKFS_DEFAULT_FEATURES)
1220                 strcat(features_buf, " (default)");
1221
1222         printf("create btrfs filesystem:\n");
1223         printf("\tblocksize: %u\n", blocksize);
1224         printf("\tnodesize:  %u\n", nodesize);
1225         printf("\tfeatures:  %s\n", features_buf);
1226
1227         mkfs_cfg.label = cctx.volume_name;
1228         mkfs_cfg.num_bytes = total_bytes;
1229         mkfs_cfg.nodesize = nodesize;
1230         mkfs_cfg.sectorsize = blocksize;
1231         mkfs_cfg.stripesize = blocksize;
1232         mkfs_cfg.features = features;
1233         /* New convert need these space */
1234         memset(mkfs_cfg.chunk_uuid, 0, BTRFS_UUID_UNPARSED_SIZE);
1235         memset(mkfs_cfg.fs_uuid, 0, BTRFS_UUID_UNPARSED_SIZE);
1236
1237         ret = make_convert_btrfs(fd, &mkfs_cfg, &cctx);
1238         if (ret) {
1239                 error("unable to create initial ctree: %s", strerror(-ret));
1240                 goto fail;
1241         }
1242
1243         root = open_ctree_fd(fd, devname, mkfs_cfg.super_bytenr,
1244                              OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1245         if (!root) {
1246                 error("unable to open ctree");
1247                 goto fail;
1248         }
1249         ret = init_btrfs(&mkfs_cfg, root, &cctx, datacsum, packing, noxattr);
1250         if (ret) {
1251                 error("unable to setup the root tree: %d", ret);
1252                 goto fail;
1253         }
1254
1255         printf("creating %s image file\n", cctx.convert_ops->name);
1256         ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name);
1257         if (ret < 0) {
1258                 error("memory allocation failure for subvolume name: %s_saved",
1259                         cctx.convert_ops->name);
1260                 goto fail;
1261         }
1262         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1263         key.offset = (u64)-1;
1264         key.type = BTRFS_ROOT_ITEM_KEY;
1265         image_root = btrfs_read_fs_root(root->fs_info, &key);
1266         if (!image_root) {
1267                 error("unable to create image subvolume");
1268                 goto fail;
1269         }
1270         ret = create_image(image_root, &mkfs_cfg, &cctx, fd,
1271                               mkfs_cfg.num_bytes, "image", datacsum);
1272         if (ret) {
1273                 error("failed to create %s/image: %d", subvol_name, ret);
1274                 goto fail;
1275         }
1276
1277         printf("creating btrfs metadata");
1278         ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
1279         ctx.cur_copy_inodes = 0;
1280
1281         if (progress) {
1282                 ctx.info = task_init(print_copied_inodes, after_copied_inodes,
1283                                      &ctx);
1284                 task_start(ctx.info);
1285         }
1286         ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx);
1287         if (ret) {
1288                 error("error during copy_inodes %d", ret);
1289                 goto fail;
1290         }
1291         if (progress) {
1292                 task_stop(ctx.info);
1293                 task_deinit(ctx.info);
1294         }
1295
1296         image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
1297         if (!image_root) {
1298                 error("unable to link subvolume %s", subvol_name);
1299                 goto fail;
1300         }
1301
1302         free(subvol_name);
1303
1304         memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
1305         if (copylabel == 1) {
1306                 __strncpy_null(root->fs_info->super_copy->label,
1307                                 cctx.volume_name, BTRFS_LABEL_SIZE - 1);
1308                 printf("copy label '%s'\n", root->fs_info->super_copy->label);
1309         } else if (copylabel == -1) {
1310                 strcpy(root->fs_info->super_copy->label, fslabel);
1311                 printf("set label to '%s'\n", fslabel);
1312         }
1313
1314         ret = close_ctree(root);
1315         if (ret) {
1316                 error("close_ctree failed: %d", ret);
1317                 goto fail;
1318         }
1319         convert_close_fs(&cctx);
1320         clean_convert_context(&cctx);
1321
1322         /*
1323          * If this step succeed, we get a mountable btrfs. Otherwise
1324          * the source fs is left unchanged.
1325          */
1326         ret = migrate_super_block(fd, mkfs_cfg.super_bytenr);
1327         if (ret) {
1328                 error("unable to migrate super block: %d", ret);
1329                 goto fail;
1330         }
1331
1332         root = open_ctree_fd(fd, devname, 0,
1333                         OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1334         if (!root) {
1335                 error("unable to open ctree for finalization");
1336                 goto fail;
1337         }
1338         root->fs_info->finalize_on_close = 1;
1339         close_ctree(root);
1340         close(fd);
1341
1342         printf("conversion complete");
1343         return 0;
1344 fail:
1345         clean_convert_context(&cctx);
1346         if (fd != -1)
1347                 close(fd);
1348         warning(
1349 "an error occurred during conversion, filesystem is partially created but not finalized and not mountable");
1350         return -1;
1351 }
1352
1353 /*
1354  * Check if a non 1:1 mapped chunk can be rolled back.
1355  * For new convert, it's OK while for old convert it's not.
1356  */
1357 static int may_rollback_chunk(struct btrfs_fs_info *fs_info, u64 bytenr)
1358 {
1359         struct btrfs_block_group_cache *bg;
1360         struct btrfs_key key;
1361         struct btrfs_path path;
1362         struct btrfs_root *extent_root = fs_info->extent_root;
1363         u64 bg_start;
1364         u64 bg_end;
1365         int ret;
1366
1367         bg = btrfs_lookup_first_block_group(fs_info, bytenr);
1368         if (!bg)
1369                 return -ENOENT;
1370         bg_start = bg->key.objectid;
1371         bg_end = bg->key.objectid + bg->key.offset;
1372
1373         key.objectid = bg_end;
1374         key.type = BTRFS_METADATA_ITEM_KEY;
1375         key.offset = 0;
1376         btrfs_init_path(&path);
1377
1378         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
1379         if (ret < 0)
1380                 return ret;
1381
1382         while (1) {
1383                 struct btrfs_extent_item *ei;
1384
1385                 ret = btrfs_previous_extent_item(extent_root, &path, bg_start);
1386                 if (ret > 0) {
1387                         ret = 0;
1388                         break;
1389                 }
1390                 if (ret < 0)
1391                         break;
1392
1393                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1394                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1395                         continue;
1396                 /* Now it's EXTENT_ITEM_KEY only */
1397                 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
1398                                     struct btrfs_extent_item);
1399                 /*
1400                  * Found data extent, means this is old convert must follow 1:1
1401                  * mapping.
1402                  */
1403                 if (btrfs_extent_flags(path.nodes[0], ei)
1404                                 & BTRFS_EXTENT_FLAG_DATA) {
1405                         ret = -EINVAL;
1406                         break;
1407                 }
1408         }
1409         btrfs_release_path(&path);
1410         return ret;
1411 }
1412
1413 static int may_rollback(struct btrfs_root *root)
1414 {
1415         struct btrfs_fs_info *info = root->fs_info;
1416         struct btrfs_multi_bio *multi = NULL;
1417         u64 bytenr;
1418         u64 length;
1419         u64 physical;
1420         u64 total_bytes;
1421         int num_stripes;
1422         int ret;
1423
1424         if (btrfs_super_num_devices(info->super_copy) != 1)
1425                 goto fail;
1426
1427         bytenr = BTRFS_SUPER_INFO_OFFSET;
1428         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1429
1430         while (1) {
1431                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
1432                                       &length, &multi, 0, NULL);
1433                 if (ret) {
1434                         if (ret == -ENOENT) {
1435                                 /* removed block group at the tail */
1436                                 if (length == (u64)-1)
1437                                         break;
1438
1439                                 /* removed block group in the middle */
1440                                 goto next;
1441                         }
1442                         goto fail;
1443                 }
1444
1445                 num_stripes = multi->num_stripes;
1446                 physical = multi->stripes[0].physical;
1447                 free(multi);
1448
1449                 if (num_stripes != 1) {
1450                         error("num stripes for bytenr %llu is not 1", bytenr);
1451                         goto fail;
1452                 }
1453
1454                 /*
1455                  * Extra check for new convert, as metadata chunk from new
1456                  * convert is much more free than old convert, it doesn't need
1457                  * to do 1:1 mapping.
1458                  */
1459                 if (physical != bytenr) {
1460                         /*
1461                          * Check if it's a metadata chunk and has only metadata
1462                          * extent.
1463                          */
1464                         ret = may_rollback_chunk(info, bytenr);
1465                         if (ret < 0)
1466                                 goto fail;
1467                 }
1468 next:
1469                 bytenr += length;
1470                 if (bytenr >= total_bytes)
1471                         break;
1472         }
1473         return 0;
1474 fail:
1475         return -1;
1476 }
1477
1478 static int do_rollback(const char *devname)
1479 {
1480         int fd = -1;
1481         int ret;
1482         int i;
1483         struct btrfs_root *root;
1484         struct btrfs_root *image_root;
1485         struct btrfs_root *chunk_root;
1486         struct btrfs_dir_item *dir;
1487         struct btrfs_inode_item *inode;
1488         struct btrfs_file_extent_item *fi;
1489         struct btrfs_trans_handle *trans;
1490         struct extent_buffer *leaf;
1491         struct btrfs_block_group_cache *cache1;
1492         struct btrfs_block_group_cache *cache2;
1493         struct btrfs_key key;
1494         struct btrfs_path path;
1495         struct extent_io_tree io_tree;
1496         char *buf = NULL;
1497         char *name;
1498         u64 bytenr;
1499         u64 num_bytes;
1500         u64 root_dir;
1501         u64 objectid;
1502         u64 offset;
1503         u64 start;
1504         u64 end;
1505         u64 sb_bytenr;
1506         u64 first_free;
1507         u64 total_bytes;
1508         u32 sectorsize;
1509
1510         extent_io_tree_init(&io_tree);
1511
1512         fd = open(devname, O_RDWR);
1513         if (fd < 0) {
1514                 error("unable to open %s: %s", devname, strerror(errno));
1515                 goto fail;
1516         }
1517         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
1518         if (!root) {
1519                 error("unable to open ctree");
1520                 goto fail;
1521         }
1522         ret = may_rollback(root);
1523         if (ret < 0) {
1524                 error("unable to do rollback: %d", ret);
1525                 goto fail;
1526         }
1527
1528         sectorsize = root->sectorsize;
1529         buf = malloc(sectorsize);
1530         if (!buf) {
1531                 error("unable to allocate memory");
1532                 goto fail;
1533         }
1534
1535         btrfs_init_path(&path);
1536
1537         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1538         key.type = BTRFS_ROOT_BACKREF_KEY;
1539         key.offset = BTRFS_FS_TREE_OBJECTID;
1540         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0,
1541                                 0);
1542         btrfs_release_path(&path);
1543         if (ret > 0) {
1544                 error("unable to convert ext2 image subvolume, is it deleted?");
1545                 goto fail;
1546         } else if (ret < 0) {
1547                 error("unable to open ext2_saved, id %llu: %s",
1548                         (unsigned long long)key.objectid, strerror(-ret));
1549                 goto fail;
1550         }
1551
1552         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1553         key.type = BTRFS_ROOT_ITEM_KEY;
1554         key.offset = (u64)-1;
1555         image_root = btrfs_read_fs_root(root->fs_info, &key);
1556         if (!image_root || IS_ERR(image_root)) {
1557                 error("unable to open subvolume %llu: %ld",
1558                         (unsigned long long)key.objectid, PTR_ERR(image_root));
1559                 goto fail;
1560         }
1561
1562         name = "image";
1563         root_dir = btrfs_root_dirid(&root->root_item);
1564         dir = btrfs_lookup_dir_item(NULL, image_root, &path,
1565                                    root_dir, name, strlen(name), 0);
1566         if (!dir || IS_ERR(dir)) {
1567                 error("unable to find file %s: %ld", name, PTR_ERR(dir));
1568                 goto fail;
1569         }
1570         leaf = path.nodes[0];
1571         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
1572         btrfs_release_path(&path);
1573
1574         objectid = key.objectid;
1575
1576         ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0);
1577         if (ret) {
1578                 error("unable to find inode item: %d", ret);
1579                 goto fail;
1580         }
1581         leaf = path.nodes[0];
1582         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
1583         total_bytes = btrfs_inode_size(leaf, inode);
1584         btrfs_release_path(&path);
1585
1586         key.objectid = objectid;
1587         key.offset = 0;
1588         key.type = BTRFS_EXTENT_DATA_KEY;
1589         ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0);
1590         if (ret != 0) {
1591                 error("unable to find first file extent: %d", ret);
1592                 btrfs_release_path(&path);
1593                 goto fail;
1594         }
1595
1596         /* build mapping tree for the relocated blocks */
1597         for (offset = 0; offset < total_bytes; ) {
1598                 leaf = path.nodes[0];
1599                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1600                         ret = btrfs_next_leaf(root, &path);
1601                         if (ret != 0)
1602                                 break;  
1603                         continue;
1604                 }
1605
1606                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1607                 if (key.objectid != objectid || key.offset != offset ||
1608                     key.type != BTRFS_EXTENT_DATA_KEY)
1609                         break;
1610
1611                 fi = btrfs_item_ptr(leaf, path.slots[0],
1612                                     struct btrfs_file_extent_item);
1613                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
1614                         break;
1615                 if (btrfs_file_extent_compression(leaf, fi) ||
1616                     btrfs_file_extent_encryption(leaf, fi) ||
1617                     btrfs_file_extent_other_encoding(leaf, fi))
1618                         break;
1619
1620                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1621                 /* skip holes and direct mapped extents */
1622                 if (bytenr == 0 || bytenr == offset)
1623                         goto next_extent;
1624
1625                 bytenr += btrfs_file_extent_offset(leaf, fi);
1626                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
1627
1628                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1629                 cache2 = btrfs_lookup_block_group(root->fs_info,
1630                                                   offset + num_bytes - 1);
1631                 /*
1632                  * Here we must take consideration of old and new convert
1633                  * behavior.
1634                  * For old convert case, sign, there is no consist chunk type
1635                  * that will cover the extent. META/DATA/SYS are all possible.
1636                  * Just ensure relocate one is in SYS chunk.
1637                  * For new convert case, they are all covered by DATA chunk.
1638                  *
1639                  * So, there is not valid chunk type check for it now.
1640                  */
1641                 if (cache1 != cache2)
1642                         break;
1643
1644                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
1645                                 EXTENT_LOCKED, GFP_NOFS);
1646                 set_state_private(&io_tree, offset, bytenr);
1647 next_extent:
1648                 offset += btrfs_file_extent_num_bytes(leaf, fi);
1649                 path.slots[0]++;
1650         }
1651         btrfs_release_path(&path);
1652
1653         if (offset < total_bytes) {
1654                 error("unable to build extent mapping (offset %llu, total_bytes %llu)",
1655                                 (unsigned long long)offset,
1656                                 (unsigned long long)total_bytes);
1657                 error("converted filesystem after balance is unable to rollback");
1658                 goto fail;
1659         }
1660
1661         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
1662         first_free &= ~((u64)sectorsize - 1);
1663         /* backup for extent #0 should exist */
1664         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
1665                 error("no backup for the first extent");
1666                 goto fail;
1667         }
1668         /* force no allocation from system block group */
1669         root->fs_info->system_allocs = -1;
1670         trans = btrfs_start_transaction(root, 1);
1671         if (!trans) {
1672                 error("unable to start transaction");
1673                 goto fail;
1674         }
1675         /*
1676          * recow the whole chunk tree, this will remove all chunk tree blocks
1677          * from system block group
1678          */
1679         chunk_root = root->fs_info->chunk_root;
1680         memset(&key, 0, sizeof(key));
1681         while (1) {
1682                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
1683                 if (ret < 0)
1684                         break;
1685
1686                 ret = btrfs_next_leaf(chunk_root, &path);
1687                 if (ret)
1688                         break;
1689
1690                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1691                 btrfs_release_path(&path);
1692         }
1693         btrfs_release_path(&path);
1694
1695         offset = 0;
1696         num_bytes = 0;
1697         while(1) {
1698                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1699                 if (!cache1)
1700                         break;
1701
1702                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
1703                         num_bytes += btrfs_block_group_used(&cache1->item);
1704
1705                 offset = cache1->key.objectid + cache1->key.offset;
1706         }
1707         /* only extent #0 left in system block group? */
1708         if (num_bytes > first_free) {
1709                 error(
1710         "unable to empty system block group (num_bytes %llu, first_free %llu",
1711                                 (unsigned long long)num_bytes,
1712                                 (unsigned long long)first_free);
1713                 goto fail;
1714         }
1715         /* create a system chunk that maps the whole device */
1716         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
1717         if (ret) {
1718                 error("unable to update system chunk: %d", ret);
1719                 goto fail;
1720         }
1721
1722         ret = btrfs_commit_transaction(trans, root);
1723         if (ret) {
1724                 error("transaction commit failed: %d", ret);
1725                 goto fail;
1726         }
1727
1728         ret = close_ctree(root);
1729         if (ret) {
1730                 error("close_ctree failed: %d", ret);
1731                 goto fail;
1732         }
1733
1734         /* zero btrfs super block mirrors */
1735         memset(buf, 0, sectorsize);
1736         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1737                 bytenr = btrfs_sb_offset(i);
1738                 if (bytenr >= total_bytes)
1739                         break;
1740                 ret = pwrite(fd, buf, sectorsize, bytenr);
1741                 if (ret != sectorsize) {
1742                         error("zeroing superblock mirror %d failed: %d",
1743                                         i, ret);
1744                         goto fail;
1745                 }
1746         }
1747
1748         sb_bytenr = (u64)-1;
1749         /* copy all relocated blocks back */
1750         while(1) {
1751                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
1752                                             EXTENT_LOCKED);
1753                 if (ret)
1754                         break;
1755
1756                 ret = get_state_private(&io_tree, start, &bytenr);
1757                 BUG_ON(ret);
1758
1759                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
1760                                   GFP_NOFS);
1761
1762                 while (start <= end) {
1763                         if (start == BTRFS_SUPER_INFO_OFFSET) {
1764                                 sb_bytenr = bytenr;
1765                                 goto next_sector;
1766                         }
1767                         ret = pread(fd, buf, sectorsize, bytenr);
1768                         if (ret < 0) {
1769                                 error("reading superblock at %llu failed: %d",
1770                                                 (unsigned long long)bytenr, ret);
1771                                 goto fail;
1772                         }
1773                         BUG_ON(ret != sectorsize);
1774                         ret = pwrite(fd, buf, sectorsize, start);
1775                         if (ret < 0) {
1776                                 error("writing superblock at %llu failed: %d",
1777                                                 (unsigned long long)start, ret);
1778                                 goto fail;
1779                         }
1780                         BUG_ON(ret != sectorsize);
1781 next_sector:
1782                         start += sectorsize;
1783                         bytenr += sectorsize;
1784                 }
1785         }
1786
1787         ret = fsync(fd);
1788         if (ret < 0) {
1789                 error("fsync failed: %s", strerror(errno));
1790                 goto fail;
1791         }
1792         /*
1793          * finally, overwrite btrfs super block.
1794          */
1795         ret = pread(fd, buf, sectorsize, sb_bytenr);
1796         if (ret < 0) {
1797                 error("reading primary superblock failed: %s",
1798                                 strerror(errno));
1799                 goto fail;
1800         }
1801         BUG_ON(ret != sectorsize);
1802         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1803         if (ret < 0) {
1804                 error("writing primary superblock failed: %s",
1805                                 strerror(errno));
1806                 goto fail;
1807         }
1808         BUG_ON(ret != sectorsize);
1809         ret = fsync(fd);
1810         if (ret < 0) {
1811                 error("fsync failed: %s", strerror(errno));
1812                 goto fail;
1813         }
1814
1815         close(fd);
1816         free(buf);
1817         extent_io_tree_cleanup(&io_tree);
1818         printf("rollback complete\n");
1819         return 0;
1820
1821 fail:
1822         if (fd != -1)
1823                 close(fd);
1824         free(buf);
1825         error("rollback aborted");
1826         return -1;
1827 }
1828
1829 static void print_usage(void)
1830 {
1831         printf("usage: btrfs-convert [options] device\n");
1832         printf("options:\n");
1833         printf("\t-d|--no-datasum        disable data checksum, sets NODATASUM\n");
1834         printf("\t-i|--no-xattr          ignore xattrs and ACLs\n");
1835         printf("\t-n|--no-inline         disable inlining of small files to metadata\n");
1836         printf("\t-N|--nodesize SIZE     set filesystem metadata nodesize\n");
1837         printf("\t-r|--rollback          roll back to the original filesystem\n");
1838         printf("\t-l|--label LABEL       set filesystem label\n");
1839         printf("\t-L|--copy-label        use label from converted filesystem\n");
1840         printf("\t-p|--progress          show converting progress (default)\n");
1841         printf("\t-O|--features LIST     comma separated list of filesystem features\n");
1842         printf("\t--no-progress          show only overview, not the detailed progress\n");
1843         printf("\n");
1844         printf("Supported filesystems:\n");
1845         printf("\text2/3/4: %s\n", BTRFSCONVERT_EXT2 ? "yes" : "no");
1846 }
1847
1848 int main(int argc, char *argv[])
1849 {
1850         int ret;
1851         int packing = 1;
1852         int noxattr = 0;
1853         int datacsum = 1;
1854         u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
1855                         BTRFS_MKFS_DEFAULT_NODE_SIZE);
1856         int rollback = 0;
1857         int copylabel = 0;
1858         int usage_error = 0;
1859         int progress = 1;
1860         char *file;
1861         char fslabel[BTRFS_LABEL_SIZE];
1862         u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
1863
1864         while(1) {
1865                 enum { GETOPT_VAL_NO_PROGRESS = 256 };
1866                 static const struct option long_options[] = {
1867                         { "no-progress", no_argument, NULL,
1868                                 GETOPT_VAL_NO_PROGRESS },
1869                         { "no-datasum", no_argument, NULL, 'd' },
1870                         { "no-inline", no_argument, NULL, 'n' },
1871                         { "no-xattr", no_argument, NULL, 'i' },
1872                         { "rollback", no_argument, NULL, 'r' },
1873                         { "features", required_argument, NULL, 'O' },
1874                         { "progress", no_argument, NULL, 'p' },
1875                         { "label", required_argument, NULL, 'l' },
1876                         { "copy-label", no_argument, NULL, 'L' },
1877                         { "nodesize", required_argument, NULL, 'N' },
1878                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
1879                         { NULL, 0, NULL, 0 }
1880                 };
1881                 int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL);
1882
1883                 if (c < 0)
1884                         break;
1885                 switch(c) {
1886                         case 'd':
1887                                 datacsum = 0;
1888                                 break;
1889                         case 'i':
1890                                 noxattr = 1;
1891                                 break;
1892                         case 'n':
1893                                 packing = 0;
1894                                 break;
1895                         case 'N':
1896                                 nodesize = parse_size(optarg);
1897                                 break;
1898                         case 'r':
1899                                 rollback = 1;
1900                                 break;
1901                         case 'l':
1902                                 copylabel = -1;
1903                                 if (strlen(optarg) >= BTRFS_LABEL_SIZE) {
1904                                         warning(
1905                                         "label too long, trimmed to %d bytes",
1906                                                 BTRFS_LABEL_SIZE - 1);
1907                                 }
1908                                 __strncpy_null(fslabel, optarg, BTRFS_LABEL_SIZE - 1);
1909                                 break;
1910                         case 'L':
1911                                 copylabel = 1;
1912                                 break;
1913                         case 'p':
1914                                 progress = 1;
1915                                 break;
1916                         case 'O': {
1917                                 char *orig = strdup(optarg);
1918                                 char *tmp = orig;
1919
1920                                 tmp = btrfs_parse_fs_features(tmp, &features);
1921                                 if (tmp) {
1922                                         error("unrecognized filesystem feature: %s",
1923                                                         tmp);
1924                                         free(orig);
1925                                         exit(1);
1926                                 }
1927                                 free(orig);
1928                                 if (features & BTRFS_FEATURE_LIST_ALL) {
1929                                         btrfs_list_all_fs_features(
1930                                                 ~BTRFS_CONVERT_ALLOWED_FEATURES);
1931                                         exit(0);
1932                                 }
1933                                 if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) {
1934                                         char buf[64];
1935
1936                                         btrfs_parse_features_to_string(buf,
1937                                                 features & ~BTRFS_CONVERT_ALLOWED_FEATURES);
1938                                         error("features not allowed for convert: %s",
1939                                                 buf);
1940                                         exit(1);
1941                                 }
1942
1943                                 break;
1944                                 }
1945                         case GETOPT_VAL_NO_PROGRESS:
1946                                 progress = 0;
1947                                 break;
1948                         case GETOPT_VAL_HELP:
1949                         default:
1950                                 print_usage();
1951                                 return c != GETOPT_VAL_HELP;
1952                 }
1953         }
1954         set_argv0(argv);
1955         if (check_argc_exact(argc - optind, 1)) {
1956                 print_usage();
1957                 return 1;
1958         }
1959
1960         if (rollback && (!datacsum || noxattr || !packing)) {
1961                 fprintf(stderr,
1962                         "Usage error: -d, -i, -n options do not apply to rollback\n");
1963                 usage_error++;
1964         }
1965
1966         if (usage_error) {
1967                 print_usage();
1968                 return 1;
1969         }
1970
1971         file = argv[optind];
1972         ret = check_mounted(file);
1973         if (ret < 0) {
1974                 error("could not check mount status: %s", strerror(-ret));
1975                 return 1;
1976         } else if (ret) {
1977                 error("%s is mounted", file);
1978                 return 1;
1979         }
1980
1981         if (rollback) {
1982                 ret = do_rollback(file);
1983         } else {
1984                 ret = do_convert(file, datacsum, packing, noxattr, nodesize,
1985                                 copylabel, fslabel, progress, features);
1986         }
1987         if (ret)
1988                 return 1;
1989         return 0;
1990 }