btrfs-progs: convert: use wider types types for inode counts for progress reports
[platform/upstream/btrfs-progs.git] / convert / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include "kerncompat.h"
20
21 #include <sys/ioctl.h>
22 #include <sys/mount.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <unistd.h>
29 #include <uuid/uuid.h>
30 #include <linux/limits.h>
31 #include <getopt.h>
32
33 #include "ctree.h"
34 #include "disk-io.h"
35 #include "volumes.h"
36 #include "transaction.h"
37 #include "crc32c.h"
38 #include "utils.h"
39 #include "task-utils.h"
40 #include "help.h"
41 #include "mkfs/common.h"
42 #include "convert/common.h"
43 #include "convert/source-fs.h"
44 #include "fsfeatures.h"
45
46 const struct btrfs_convert_operations ext2_convert_ops;
47
48 static const struct btrfs_convert_operations *convert_operations[] = {
49 #if BTRFSCONVERT_EXT2
50         &ext2_convert_ops,
51 #endif
52 };
53
54 static void *print_copied_inodes(void *p)
55 {
56         struct task_ctx *priv = p;
57         const char work_indicator[] = { '.', 'o', 'O', 'o' };
58         u64 count = 0;
59
60         task_period_start(priv->info, 1000 /* 1s */);
61         while (1) {
62                 count++;
63                 printf("copy inodes [%c] [%10llu/%10llu]\r",
64                        work_indicator[count % 4],
65                        (unsigned long long)priv->cur_copy_inodes,
66                        (unsigned long long)priv->max_copy_inodes);
67                 fflush(stdout);
68                 task_period_wait(priv->info);
69         }
70
71         return NULL;
72 }
73
74 static int after_copied_inodes(void *p)
75 {
76         printf("\n");
77         fflush(stdout);
78
79         return 0;
80 }
81
82 static inline int copy_inodes(struct btrfs_convert_context *cctx,
83                               struct btrfs_root *root, int datacsum,
84                               int packing, int noxattr, struct task_ctx *p)
85 {
86         return cctx->convert_ops->copy_inodes(cctx, root, datacsum, packing,
87                                              noxattr, p);
88 }
89
90 static inline void convert_close_fs(struct btrfs_convert_context *cctx)
91 {
92         cctx->convert_ops->close_fs(cctx);
93 }
94
95 static inline int convert_check_state(struct btrfs_convert_context *cctx)
96 {
97         return cctx->convert_ops->check_state(cctx);
98 }
99
100 static int csum_disk_extent(struct btrfs_trans_handle *trans,
101                             struct btrfs_root *root,
102                             u64 disk_bytenr, u64 num_bytes)
103 {
104         u32 blocksize = root->sectorsize;
105         u64 offset;
106         char *buffer;
107         int ret = 0;
108
109         buffer = malloc(blocksize);
110         if (!buffer)
111                 return -ENOMEM;
112         for (offset = 0; offset < num_bytes; offset += blocksize) {
113                 ret = read_disk_extent(root, disk_bytenr + offset,
114                                         blocksize, buffer);
115                 if (ret)
116                         break;
117                 ret = btrfs_csum_file_block(trans,
118                                             root->fs_info->csum_root,
119                                             disk_bytenr + num_bytes,
120                                             disk_bytenr + offset,
121                                             buffer, blocksize);
122                 if (ret)
123                         break;
124         }
125         free(buffer);
126         return ret;
127 }
128
129 static int create_image_file_range(struct btrfs_trans_handle *trans,
130                                       struct btrfs_root *root,
131                                       struct cache_tree *used,
132                                       struct btrfs_inode_item *inode,
133                                       u64 ino, u64 bytenr, u64 *ret_len,
134                                       int datacsum)
135 {
136         struct cache_extent *cache;
137         struct btrfs_block_group_cache *bg_cache;
138         u64 len = *ret_len;
139         u64 disk_bytenr;
140         int i;
141         int ret;
142
143         if (bytenr != round_down(bytenr, root->sectorsize)) {
144                 error("bytenr not sectorsize aligned: %llu",
145                                 (unsigned long long)bytenr);
146                 return -EINVAL;
147         }
148         if (len != round_down(len, root->sectorsize)) {
149                 error("length not sectorsize aligned: %llu",
150                                 (unsigned long long)len);
151                 return -EINVAL;
152         }
153         len = min_t(u64, len, BTRFS_MAX_EXTENT_SIZE);
154
155         /*
156          * Skip sb ranges first
157          * [0, 1M), [sb_offset(1), +64K), [sb_offset(2), +64K].
158          *
159          * Or we will insert a hole into current image file, and later
160          * migrate block will fail as there is already a file extent.
161          */
162         if (bytenr < 1024 * 1024) {
163                 *ret_len = 1024 * 1024 - bytenr;
164                 return 0;
165         }
166         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
167                 u64 cur = btrfs_sb_offset(i);
168
169                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
170                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
171                         return 0;
172                 }
173         }
174         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
175                 u64 cur = btrfs_sb_offset(i);
176
177                 /*
178                  *      |--reserved--|
179                  * |----range-------|
180                  * May still need to go through file extent inserts
181                  */
182                 if (bytenr < cur && bytenr + len >= cur) {
183                         len = min_t(u64, len, cur - bytenr);
184                         break;
185                 }
186                 /*
187                  * |--reserved--|
188                  *      |---range---|
189                  * Drop out, no need to insert anything
190                  */
191                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
192                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
193                         return 0;
194                 }
195         }
196
197         cache = search_cache_extent(used, bytenr);
198         if (cache) {
199                 if (cache->start <= bytenr) {
200                         /*
201                          * |///////Used///////|
202                          *      |<--insert--->|
203                          *      bytenr
204                          */
205                         len = min_t(u64, len, cache->start + cache->size -
206                                     bytenr);
207                         disk_bytenr = bytenr;
208                 } else {
209                         /*
210                          *              |//Used//|
211                          *  |<-insert-->|
212                          *  bytenr
213                          */
214                         len = min(len, cache->start - bytenr);
215                         disk_bytenr = 0;
216                         datacsum = 0;
217                 }
218         } else {
219                 /*
220                  * |//Used//|           |EOF
221                  *          |<-insert-->|
222                  *          bytenr
223                  */
224                 disk_bytenr = 0;
225                 datacsum = 0;
226         }
227
228         if (disk_bytenr) {
229                 /* Check if the range is in a data block group */
230                 bg_cache = btrfs_lookup_block_group(root->fs_info, bytenr);
231                 if (!bg_cache)
232                         return -ENOENT;
233                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
234                         return -EINVAL;
235
236                 /* The extent should never cross block group boundary */
237                 len = min_t(u64, len, bg_cache->key.objectid +
238                             bg_cache->key.offset - bytenr);
239         }
240
241         if (len != round_down(len, root->sectorsize)) {
242                 error("remaining length not sectorsize aligned: %llu",
243                                 (unsigned long long)len);
244                 return -EINVAL;
245         }
246         ret = btrfs_record_file_extent(trans, root, ino, inode, bytenr,
247                                        disk_bytenr, len);
248         if (ret < 0)
249                 return ret;
250
251         if (datacsum)
252                 ret = csum_disk_extent(trans, root, bytenr, len);
253         *ret_len = len;
254         return ret;
255 }
256
257 /*
258  * Relocate old fs data in one reserved ranges
259  *
260  * Since all old fs data in reserved range is not covered by any chunk nor
261  * data extent, we don't need to handle any reference but add new
262  * extent/reference, which makes codes more clear
263  */
264 static int migrate_one_reserved_range(struct btrfs_trans_handle *trans,
265                                       struct btrfs_root *root,
266                                       struct cache_tree *used,
267                                       struct btrfs_inode_item *inode, int fd,
268                                       u64 ino, u64 start, u64 len, int datacsum)
269 {
270         u64 cur_off = start;
271         u64 cur_len = len;
272         u64 hole_start = start;
273         u64 hole_len;
274         struct cache_extent *cache;
275         struct btrfs_key key;
276         struct extent_buffer *eb;
277         int ret = 0;
278
279         while (cur_off < start + len) {
280                 cache = lookup_cache_extent(used, cur_off, cur_len);
281                 if (!cache)
282                         break;
283                 cur_off = max(cache->start, cur_off);
284                 cur_len = min(cache->start + cache->size, start + len) -
285                           cur_off;
286                 BUG_ON(cur_len < root->sectorsize);
287
288                 /* reserve extent for the data */
289                 ret = btrfs_reserve_extent(trans, root, cur_len, 0, 0, (u64)-1,
290                                            &key, 1);
291                 if (ret < 0)
292                         break;
293
294                 eb = malloc(sizeof(*eb) + cur_len);
295                 if (!eb) {
296                         ret = -ENOMEM;
297                         break;
298                 }
299
300                 ret = pread(fd, eb->data, cur_len, cur_off);
301                 if (ret < cur_len) {
302                         ret = (ret < 0 ? ret : -EIO);
303                         free(eb);
304                         break;
305                 }
306                 eb->start = key.objectid;
307                 eb->len = key.offset;
308
309                 /* Write the data */
310                 ret = write_and_map_eb(trans, root, eb);
311                 free(eb);
312                 if (ret < 0)
313                         break;
314
315                 /* Now handle extent item and file extent things */
316                 ret = btrfs_record_file_extent(trans, root, ino, inode, cur_off,
317                                                key.objectid, key.offset);
318                 if (ret < 0)
319                         break;
320                 /* Finally, insert csum items */
321                 if (datacsum)
322                         ret = csum_disk_extent(trans, root, key.objectid,
323                                                key.offset);
324
325                 /* Don't forget to insert hole */
326                 hole_len = cur_off - hole_start;
327                 if (hole_len) {
328                         ret = btrfs_record_file_extent(trans, root, ino, inode,
329                                         hole_start, 0, hole_len);
330                         if (ret < 0)
331                                 break;
332                 }
333
334                 cur_off += key.offset;
335                 hole_start = cur_off;
336                 cur_len = start + len - cur_off;
337         }
338         /* Last hole */
339         if (start + len - hole_start > 0)
340                 ret = btrfs_record_file_extent(trans, root, ino, inode,
341                                 hole_start, 0, start + len - hole_start);
342         return ret;
343 }
344
345 /*
346  * Relocate the used ext2 data in reserved ranges
347  * [0,1M)
348  * [btrfs_sb_offset(1), +BTRFS_STRIPE_LEN)
349  * [btrfs_sb_offset(2), +BTRFS_STRIPE_LEN)
350  */
351 static int migrate_reserved_ranges(struct btrfs_trans_handle *trans,
352                                    struct btrfs_root *root,
353                                    struct cache_tree *used,
354                                    struct btrfs_inode_item *inode, int fd,
355                                    u64 ino, u64 total_bytes, int datacsum)
356 {
357         u64 cur_off;
358         u64 cur_len;
359         int ret = 0;
360
361         /* 0 ~ 1M */
362         cur_off = 0;
363         cur_len = 1024 * 1024;
364         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
365                                          cur_off, cur_len, datacsum);
366         if (ret < 0)
367                 return ret;
368
369         /* second sb(fisrt sb is included in 0~1M) */
370         cur_off = btrfs_sb_offset(1);
371         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
372         if (cur_off > total_bytes)
373                 return ret;
374         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
375                                          cur_off, cur_len, datacsum);
376         if (ret < 0)
377                 return ret;
378
379         /* Last sb */
380         cur_off = btrfs_sb_offset(2);
381         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
382         if (cur_off > total_bytes)
383                 return ret;
384         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
385                                          cur_off, cur_len, datacsum);
386         return ret;
387 }
388
389 /*
390  * Helper for expand and merge extent_cache for wipe_one_reserved_range() to
391  * handle wiping a range that exists in cache.
392  */
393 static int _expand_extent_cache(struct cache_tree *tree,
394                                 struct cache_extent *entry,
395                                 u64 min_stripe_size, int backward)
396 {
397         struct cache_extent *ce;
398         int diff;
399
400         if (entry->size >= min_stripe_size)
401                 return 0;
402         diff = min_stripe_size - entry->size;
403
404         if (backward) {
405                 ce = prev_cache_extent(entry);
406                 if (!ce)
407                         goto expand_back;
408                 if (ce->start + ce->size >= entry->start - diff) {
409                         /* Directly merge with previous extent */
410                         ce->size = entry->start + entry->size - ce->start;
411                         remove_cache_extent(tree, entry);
412                         free(entry);
413                         return 0;
414                 }
415 expand_back:
416                 /* No overlap, normal extent */
417                 if (entry->start < diff) {
418                         error("cannot find space for data chunk layout");
419                         return -ENOSPC;
420                 }
421                 entry->start -= diff;
422                 entry->size += diff;
423                 return 0;
424         }
425         ce = next_cache_extent(entry);
426         if (!ce)
427                 goto expand_after;
428         if (entry->start + entry->size + diff >= ce->start) {
429                 /* Directly merge with next extent */
430                 entry->size = ce->start + ce->size - entry->start;
431                 remove_cache_extent(tree, ce);
432                 free(ce);
433                 return 0;
434         }
435 expand_after:
436         entry->size += diff;
437         return 0;
438 }
439
440 /*
441  * Remove one reserve range from given cache tree
442  * if min_stripe_size is non-zero, it will ensure for split case,
443  * all its split cache extent is no smaller than @min_strip_size / 2.
444  */
445 static int wipe_one_reserved_range(struct cache_tree *tree,
446                                    u64 start, u64 len, u64 min_stripe_size,
447                                    int ensure_size)
448 {
449         struct cache_extent *cache;
450         int ret;
451
452         BUG_ON(ensure_size && min_stripe_size == 0);
453         /*
454          * The logical here is simplified to handle special cases only
455          * So we don't need to consider merge case for ensure_size
456          */
457         BUG_ON(min_stripe_size && (min_stripe_size < len * 2 ||
458                min_stripe_size / 2 < BTRFS_STRIPE_LEN));
459
460         /* Also, wipe range should already be aligned */
461         BUG_ON(start != round_down(start, BTRFS_STRIPE_LEN) ||
462                start + len != round_up(start + len, BTRFS_STRIPE_LEN));
463
464         min_stripe_size /= 2;
465
466         cache = lookup_cache_extent(tree, start, len);
467         if (!cache)
468                 return 0;
469
470         if (start <= cache->start) {
471                 /*
472                  *      |--------cache---------|
473                  * |-wipe-|
474                  */
475                 BUG_ON(start + len <= cache->start);
476
477                 /*
478                  * The wipe size is smaller than min_stripe_size / 2,
479                  * so the result length should still meet min_stripe_size
480                  * And no need to do alignment
481                  */
482                 cache->size -= (start + len - cache->start);
483                 if (cache->size == 0) {
484                         remove_cache_extent(tree, cache);
485                         free(cache);
486                         return 0;
487                 }
488
489                 BUG_ON(ensure_size && cache->size < min_stripe_size);
490
491                 cache->start = start + len;
492                 return 0;
493         } else if (start > cache->start && start + len < cache->start +
494                    cache->size) {
495                 /*
496                  * |-------cache-----|
497                  *      |-wipe-|
498                  */
499                 u64 old_start = cache->start;
500                 u64 old_len = cache->size;
501                 u64 insert_start = start + len;
502                 u64 insert_len;
503
504                 cache->size = start - cache->start;
505                 /* Expand the leading half part if needed */
506                 if (ensure_size && cache->size < min_stripe_size) {
507                         ret = _expand_extent_cache(tree, cache,
508                                         min_stripe_size, 1);
509                         if (ret < 0)
510                                 return ret;
511                 }
512
513                 /* And insert the new one */
514                 insert_len = old_start + old_len - start - len;
515                 ret = add_merge_cache_extent(tree, insert_start, insert_len);
516                 if (ret < 0)
517                         return ret;
518
519                 /* Expand the last half part if needed */
520                 if (ensure_size && insert_len < min_stripe_size) {
521                         cache = lookup_cache_extent(tree, insert_start,
522                                                     insert_len);
523                         if (!cache || cache->start != insert_start ||
524                             cache->size != insert_len)
525                                 return -ENOENT;
526                         ret = _expand_extent_cache(tree, cache,
527                                         min_stripe_size, 0);
528                 }
529
530                 return ret;
531         }
532         /*
533          * |----cache-----|
534          *              |--wipe-|
535          * Wipe len should be small enough and no need to expand the
536          * remaining extent
537          */
538         cache->size = start - cache->start;
539         BUG_ON(ensure_size && cache->size < min_stripe_size);
540         return 0;
541 }
542
543 /*
544  * Remove reserved ranges from given cache_tree
545  *
546  * It will remove the following ranges
547  * 1) 0~1M
548  * 2) 2nd superblock, +64K (make sure chunks are 64K aligned)
549  * 3) 3rd superblock, +64K
550  *
551  * @min_stripe must be given for safety check
552  * and if @ensure_size is given, it will ensure affected cache_extent will be
553  * larger than min_stripe_size
554  */
555 static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size,
556                                 int ensure_size)
557 {
558         int ret;
559
560         ret = wipe_one_reserved_range(tree, 0, 1024 * 1024, min_stripe_size,
561                                       ensure_size);
562         if (ret < 0)
563                 return ret;
564         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(1),
565                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
566         if (ret < 0)
567                 return ret;
568         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(2),
569                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
570         return ret;
571 }
572
573 static int calculate_available_space(struct btrfs_convert_context *cctx)
574 {
575         struct cache_tree *used = &cctx->used;
576         struct cache_tree *data_chunks = &cctx->data_chunks;
577         struct cache_tree *free = &cctx->free;
578         struct cache_extent *cache;
579         u64 cur_off = 0;
580         /*
581          * Twice the minimal chunk size, to allow later wipe_reserved_ranges()
582          * works without need to consider overlap
583          */
584         u64 min_stripe_size = 2 * 16 * 1024 * 1024;
585         int ret;
586
587         /* Calculate data_chunks */
588         for (cache = first_cache_extent(used); cache;
589              cache = next_cache_extent(cache)) {
590                 u64 cur_len;
591
592                 if (cache->start + cache->size < cur_off)
593                         continue;
594                 if (cache->start > cur_off + min_stripe_size)
595                         cur_off = cache->start;
596                 cur_len = max(cache->start + cache->size - cur_off,
597                               min_stripe_size);
598                 ret = add_merge_cache_extent(data_chunks, cur_off, cur_len);
599                 if (ret < 0)
600                         goto out;
601                 cur_off += cur_len;
602         }
603         /*
604          * remove reserved ranges, so we won't ever bother relocating an old
605          * filesystem extent to other place.
606          */
607         ret = wipe_reserved_ranges(data_chunks, min_stripe_size, 1);
608         if (ret < 0)
609                 goto out;
610
611         cur_off = 0;
612         /*
613          * Calculate free space
614          * Always round up the start bytenr, to avoid metadata extent corss
615          * stripe boundary, as later mkfs_convert() won't have all the extent
616          * allocation check
617          */
618         for (cache = first_cache_extent(data_chunks); cache;
619              cache = next_cache_extent(cache)) {
620                 if (cache->start < cur_off)
621                         continue;
622                 if (cache->start > cur_off) {
623                         u64 insert_start;
624                         u64 len;
625
626                         len = cache->start - round_up(cur_off,
627                                                       BTRFS_STRIPE_LEN);
628                         insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
629
630                         ret = add_merge_cache_extent(free, insert_start, len);
631                         if (ret < 0)
632                                 goto out;
633                 }
634                 cur_off = cache->start + cache->size;
635         }
636         /* Don't forget the last range */
637         if (cctx->total_bytes > cur_off) {
638                 u64 len = cctx->total_bytes - cur_off;
639                 u64 insert_start;
640
641                 insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
642
643                 ret = add_merge_cache_extent(free, insert_start, len);
644                 if (ret < 0)
645                         goto out;
646         }
647
648         /* Remove reserved bytes */
649         ret = wipe_reserved_ranges(free, min_stripe_size, 0);
650 out:
651         return ret;
652 }
653
654 /*
655  * Read used space, and since we have the used space,
656  * calcuate data_chunks and free for later mkfs
657  */
658 static int convert_read_used_space(struct btrfs_convert_context *cctx)
659 {
660         int ret;
661
662         ret = cctx->convert_ops->read_used_space(cctx);
663         if (ret)
664                 return ret;
665
666         ret = calculate_available_space(cctx);
667         return ret;
668 }
669
670 /*
671  * Create the fs image file of old filesystem.
672  *
673  * This is completely fs independent as we have cctx->used, only
674  * need to create file extents pointing to all the positions.
675  */
676 static int create_image(struct btrfs_root *root,
677                            struct btrfs_mkfs_config *cfg,
678                            struct btrfs_convert_context *cctx, int fd,
679                            u64 size, char *name, int datacsum)
680 {
681         struct btrfs_inode_item buf;
682         struct btrfs_trans_handle *trans;
683         struct btrfs_path path;
684         struct btrfs_key key;
685         struct cache_extent *cache;
686         struct cache_tree used_tmp;
687         u64 cur;
688         u64 ino;
689         u64 flags = BTRFS_INODE_READONLY;
690         int ret;
691
692         if (!datacsum)
693                 flags |= BTRFS_INODE_NODATASUM;
694
695         trans = btrfs_start_transaction(root, 1);
696         if (!trans)
697                 return -ENOMEM;
698
699         cache_tree_init(&used_tmp);
700         btrfs_init_path(&path);
701
702         ret = btrfs_find_free_objectid(trans, root, BTRFS_FIRST_FREE_OBJECTID,
703                                        &ino);
704         if (ret < 0)
705                 goto out;
706         ret = btrfs_new_inode(trans, root, ino, 0400 | S_IFREG);
707         if (ret < 0)
708                 goto out;
709         ret = btrfs_change_inode_flags(trans, root, ino, flags);
710         if (ret < 0)
711                 goto out;
712         ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name,
713                              strlen(name), BTRFS_FT_REG_FILE, NULL, 1);
714         if (ret < 0)
715                 goto out;
716
717         key.objectid = ino;
718         key.type = BTRFS_INODE_ITEM_KEY;
719         key.offset = 0;
720
721         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
722         if (ret) {
723                 ret = (ret > 0 ? -ENOENT : ret);
724                 goto out;
725         }
726         read_extent_buffer(path.nodes[0], &buf,
727                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
728                         sizeof(buf));
729         btrfs_release_path(&path);
730
731         /*
732          * Create a new used space cache, which doesn't contain the reserved
733          * range
734          */
735         for (cache = first_cache_extent(&cctx->used); cache;
736              cache = next_cache_extent(cache)) {
737                 ret = add_cache_extent(&used_tmp, cache->start, cache->size);
738                 if (ret < 0)
739                         goto out;
740         }
741         ret = wipe_reserved_ranges(&used_tmp, 0, 0);
742         if (ret < 0)
743                 goto out;
744
745         /*
746          * Start from 1M, as 0~1M is reserved, and create_image_file_range()
747          * can't handle bytenr 0(will consider it as a hole)
748          */
749         cur = 1024 * 1024;
750         while (cur < size) {
751                 u64 len = size - cur;
752
753                 ret = create_image_file_range(trans, root, &used_tmp,
754                                                 &buf, ino, cur, &len, datacsum);
755                 if (ret < 0)
756                         goto out;
757                 cur += len;
758         }
759         /* Handle the reserved ranges */
760         ret = migrate_reserved_ranges(trans, root, &cctx->used, &buf, fd, ino,
761                                       cfg->num_bytes, datacsum);
762
763
764         key.objectid = ino;
765         key.type = BTRFS_INODE_ITEM_KEY;
766         key.offset = 0;
767         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
768         if (ret) {
769                 ret = (ret > 0 ? -ENOENT : ret);
770                 goto out;
771         }
772         btrfs_set_stack_inode_size(&buf, cfg->num_bytes);
773         write_extent_buffer(path.nodes[0], &buf,
774                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
775                         sizeof(buf));
776 out:
777         free_extent_cache_tree(&used_tmp);
778         btrfs_release_path(&path);
779         btrfs_commit_transaction(trans, root);
780         return ret;
781 }
782
783 static struct btrfs_root* link_subvol(struct btrfs_root *root,
784                 const char *base, u64 root_objectid)
785 {
786         struct btrfs_trans_handle *trans;
787         struct btrfs_fs_info *fs_info = root->fs_info;
788         struct btrfs_root *tree_root = fs_info->tree_root;
789         struct btrfs_root *new_root = NULL;
790         struct btrfs_path path;
791         struct btrfs_inode_item *inode_item;
792         struct extent_buffer *leaf;
793         struct btrfs_key key;
794         u64 dirid = btrfs_root_dirid(&root->root_item);
795         u64 index = 2;
796         char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
797         int len;
798         int i;
799         int ret;
800
801         len = strlen(base);
802         if (len == 0 || len > BTRFS_NAME_LEN)
803                 return NULL;
804
805         btrfs_init_path(&path);
806         key.objectid = dirid;
807         key.type = BTRFS_DIR_INDEX_KEY;
808         key.offset = (u64)-1;
809
810         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
811         if (ret <= 0) {
812                 error("search for DIR_INDEX dirid %llu failed: %d",
813                                 (unsigned long long)dirid, ret);
814                 goto fail;
815         }
816
817         if (path.slots[0] > 0) {
818                 path.slots[0]--;
819                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
820                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
821                         index = key.offset + 1;
822         }
823         btrfs_release_path(&path);
824
825         trans = btrfs_start_transaction(root, 1);
826         if (!trans) {
827                 error("unable to start transaction");
828                 goto fail;
829         }
830
831         key.objectid = dirid;
832         key.offset = 0;
833         key.type =  BTRFS_INODE_ITEM_KEY;
834
835         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
836         if (ret) {
837                 error("search for INODE_ITEM %llu failed: %d",
838                                 (unsigned long long)dirid, ret);
839                 goto fail;
840         }
841         leaf = path.nodes[0];
842         inode_item = btrfs_item_ptr(leaf, path.slots[0],
843                                     struct btrfs_inode_item);
844
845         key.objectid = root_objectid;
846         key.offset = (u64)-1;
847         key.type = BTRFS_ROOT_ITEM_KEY;
848
849         memcpy(buf, base, len);
850         for (i = 0; i < 1024; i++) {
851                 ret = btrfs_insert_dir_item(trans, root, buf, len,
852                                             dirid, &key, BTRFS_FT_DIR, index);
853                 if (ret != -EEXIST)
854                         break;
855                 len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
856                 if (len < 1 || len > BTRFS_NAME_LEN) {
857                         ret = -EINVAL;
858                         break;
859                 }
860         }
861         if (ret)
862                 goto fail;
863
864         btrfs_set_inode_size(leaf, inode_item, len * 2 +
865                              btrfs_inode_size(leaf, inode_item));
866         btrfs_mark_buffer_dirty(leaf);
867         btrfs_release_path(&path);
868
869         /* add the backref first */
870         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
871                                  BTRFS_ROOT_BACKREF_KEY,
872                                  root->root_key.objectid,
873                                  dirid, index, buf, len);
874         if (ret) {
875                 error("unable to add root backref for %llu: %d",
876                                 root->root_key.objectid, ret);
877                 goto fail;
878         }
879
880         /* now add the forward ref */
881         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
882                                  BTRFS_ROOT_REF_KEY, root_objectid,
883                                  dirid, index, buf, len);
884         if (ret) {
885                 error("unable to add root ref for %llu: %d",
886                                 root->root_key.objectid, ret);
887                 goto fail;
888         }
889
890         ret = btrfs_commit_transaction(trans, root);
891         if (ret) {
892                 error("transaction commit failed: %d", ret);
893                 goto fail;
894         }
895
896         new_root = btrfs_read_fs_root(fs_info, &key);
897         if (IS_ERR(new_root)) {
898                 error("unable to fs read root: %lu", PTR_ERR(new_root));
899                 new_root = NULL;
900         }
901 fail:
902         btrfs_init_path(&path);
903         return new_root;
904 }
905
906 static int create_subvol(struct btrfs_trans_handle *trans,
907                          struct btrfs_root *root, u64 root_objectid)
908 {
909         struct extent_buffer *tmp;
910         struct btrfs_root *new_root;
911         struct btrfs_key key;
912         struct btrfs_root_item root_item;
913         int ret;
914
915         ret = btrfs_copy_root(trans, root, root->node, &tmp,
916                               root_objectid);
917         if (ret)
918                 return ret;
919
920         memcpy(&root_item, &root->root_item, sizeof(root_item));
921         btrfs_set_root_bytenr(&root_item, tmp->start);
922         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
923         btrfs_set_root_generation(&root_item, trans->transid);
924         free_extent_buffer(tmp);
925
926         key.objectid = root_objectid;
927         key.type = BTRFS_ROOT_ITEM_KEY;
928         key.offset = trans->transid;
929         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
930                                 &key, &root_item);
931
932         key.offset = (u64)-1;
933         new_root = btrfs_read_fs_root(root->fs_info, &key);
934         if (!new_root || IS_ERR(new_root)) {
935                 error("unable to fs read root: %lu", PTR_ERR(new_root));
936                 return PTR_ERR(new_root);
937         }
938
939         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
940
941         return ret;
942 }
943
944 /*
945  * New make_btrfs() has handle system and meta chunks quite well.
946  * So only need to add remaining data chunks.
947  */
948 static int make_convert_data_block_groups(struct btrfs_trans_handle *trans,
949                                           struct btrfs_fs_info *fs_info,
950                                           struct btrfs_mkfs_config *cfg,
951                                           struct btrfs_convert_context *cctx)
952 {
953         struct btrfs_root *extent_root = fs_info->extent_root;
954         struct cache_tree *data_chunks = &cctx->data_chunks;
955         struct cache_extent *cache;
956         u64 max_chunk_size;
957         int ret = 0;
958
959         /*
960          * Don't create data chunk over 10% of the convert device
961          * And for single chunk, don't create chunk larger than 1G.
962          */
963         max_chunk_size = cfg->num_bytes / 10;
964         max_chunk_size = min((u64)(1024 * 1024 * 1024), max_chunk_size);
965         max_chunk_size = round_down(max_chunk_size, extent_root->sectorsize);
966
967         for (cache = first_cache_extent(data_chunks); cache;
968              cache = next_cache_extent(cache)) {
969                 u64 cur = cache->start;
970
971                 while (cur < cache->start + cache->size) {
972                         u64 len;
973                         u64 cur_backup = cur;
974
975                         len = min(max_chunk_size,
976                                   cache->start + cache->size - cur);
977                         ret = btrfs_alloc_data_chunk(trans, extent_root,
978                                         &cur_backup, len,
979                                         BTRFS_BLOCK_GROUP_DATA, 1);
980                         if (ret < 0)
981                                 break;
982                         ret = btrfs_make_block_group(trans, extent_root, 0,
983                                         BTRFS_BLOCK_GROUP_DATA,
984                                         BTRFS_FIRST_CHUNK_TREE_OBJECTID,
985                                         cur, len);
986                         if (ret < 0)
987                                 break;
988                         cur += len;
989                 }
990         }
991         return ret;
992 }
993
994 /*
995  * Init the temp btrfs to a operational status.
996  *
997  * It will fix the extent usage accounting(XXX: Do we really need?) and
998  * insert needed data chunks, to ensure all old fs data extents are covered
999  * by DATA chunks, preventing wrong chunks are allocated.
1000  *
1001  * And also create convert image subvolume and relocation tree.
1002  * (XXX: Not need again?)
1003  * But the convert image subvolume is *NOT* linked to fs tree yet.
1004  */
1005 static int init_btrfs(struct btrfs_mkfs_config *cfg, struct btrfs_root *root,
1006                          struct btrfs_convert_context *cctx, int datacsum,
1007                          int packing, int noxattr)
1008 {
1009         struct btrfs_key location;
1010         struct btrfs_trans_handle *trans;
1011         struct btrfs_fs_info *fs_info = root->fs_info;
1012         int ret;
1013
1014         /*
1015          * Don't alloc any metadata/system chunk, as we don't want
1016          * any meta/sys chunk allcated before all data chunks are inserted.
1017          * Or we screw up the chunk layout just like the old implement.
1018          */
1019         fs_info->avoid_sys_chunk_alloc = 1;
1020         fs_info->avoid_meta_chunk_alloc = 1;
1021         trans = btrfs_start_transaction(root, 1);
1022         if (!trans) {
1023                 error("unable to start transaction");
1024                 ret = -EINVAL;
1025                 goto err;
1026         }
1027         ret = btrfs_fix_block_accounting(trans, root);
1028         if (ret)
1029                 goto err;
1030         ret = make_convert_data_block_groups(trans, fs_info, cfg, cctx);
1031         if (ret)
1032                 goto err;
1033         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1034                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1035         if (ret)
1036                 goto err;
1037         memcpy(&location, &root->root_key, sizeof(location));
1038         location.offset = (u64)-1;
1039         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1040                                 btrfs_super_root_dir(fs_info->super_copy),
1041                                 &location, BTRFS_FT_DIR, 0);
1042         if (ret)
1043                 goto err;
1044         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1045                                 location.objectid,
1046                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1047         if (ret)
1048                 goto err;
1049         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1050                              BTRFS_FIRST_FREE_OBJECTID);
1051
1052         /* subvol for fs image file */
1053         ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
1054         if (ret < 0) {
1055                 error("failed to create subvolume image root: %d", ret);
1056                 goto err;
1057         }
1058         /* subvol for data relocation tree */
1059         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1060         if (ret < 0) {
1061                 error("failed to create DATA_RELOC root: %d", ret);
1062                 goto err;
1063         }
1064
1065         ret = btrfs_commit_transaction(trans, root);
1066         fs_info->avoid_sys_chunk_alloc = 0;
1067         fs_info->avoid_meta_chunk_alloc = 0;
1068 err:
1069         return ret;
1070 }
1071
1072 /*
1073  * Migrate super block to its default position and zero 0 ~ 16k
1074  */
1075 static int migrate_super_block(int fd, u64 old_bytenr)
1076 {
1077         int ret;
1078         struct extent_buffer *buf;
1079         struct btrfs_super_block *super;
1080         u32 len;
1081         u32 bytenr;
1082
1083         buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE);
1084         if (!buf)
1085                 return -ENOMEM;
1086
1087         buf->len = BTRFS_SUPER_INFO_SIZE;
1088         ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, old_bytenr);
1089         if (ret != BTRFS_SUPER_INFO_SIZE)
1090                 goto fail;
1091
1092         super = (struct btrfs_super_block *)buf->data;
1093         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1094         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1095
1096         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1097         ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE,
1098                 BTRFS_SUPER_INFO_OFFSET);
1099         if (ret != BTRFS_SUPER_INFO_SIZE)
1100                 goto fail;
1101
1102         ret = fsync(fd);
1103         if (ret)
1104                 goto fail;
1105
1106         memset(buf->data, 0, BTRFS_SUPER_INFO_SIZE);
1107         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
1108                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
1109                 if (len > BTRFS_SUPER_INFO_SIZE)
1110                         len = BTRFS_SUPER_INFO_SIZE;
1111                 ret = pwrite(fd, buf->data, len, bytenr);
1112                 if (ret != len) {
1113                         fprintf(stderr, "unable to zero fill device\n");
1114                         break;
1115                 }
1116                 bytenr += len;
1117         }
1118         ret = 0;
1119         fsync(fd);
1120 fail:
1121         free(buf);
1122         if (ret > 0)
1123                 ret = -1;
1124         return ret;
1125 }
1126
1127 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
1128 {
1129         struct btrfs_chunk *chunk;
1130         struct btrfs_disk_key *key;
1131         u32 sectorsize = btrfs_super_sectorsize(super);
1132
1133         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1134         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1135                                        sizeof(struct btrfs_disk_key));
1136
1137         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1138         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1139         btrfs_set_disk_key_offset(key, 0);
1140
1141         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
1142         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1143         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1144         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1145         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1146         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1147         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1148         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1149         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1150         chunk->stripe.devid = super->dev_item.devid;
1151         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1152         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1153         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1154         return 0;
1155 }
1156
1157 static int convert_open_fs(const char *devname,
1158                            struct btrfs_convert_context *cctx)
1159 {
1160         int i;
1161
1162         memset(cctx, 0, sizeof(*cctx));
1163
1164         for (i = 0; i < ARRAY_SIZE(convert_operations); i++) {
1165                 int ret = convert_operations[i]->open_fs(cctx, devname);
1166
1167                 if (ret == 0) {
1168                         cctx->convert_ops = convert_operations[i];
1169                         return ret;
1170                 }
1171         }
1172
1173         error("no file system found to convert");
1174         return -1;
1175 }
1176
1177 static int do_convert(const char *devname, int datacsum, int packing,
1178                 int noxattr, u32 nodesize, int copylabel, const char *fslabel,
1179                 int progress, u64 features)
1180 {
1181         int ret;
1182         int fd = -1;
1183         u32 blocksize;
1184         u64 total_bytes;
1185         struct btrfs_root *root;
1186         struct btrfs_root *image_root;
1187         struct btrfs_convert_context cctx;
1188         struct btrfs_key key;
1189         char *subvol_name = NULL;
1190         struct task_ctx ctx;
1191         char features_buf[64];
1192         struct btrfs_mkfs_config mkfs_cfg;
1193
1194         init_convert_context(&cctx);
1195         ret = convert_open_fs(devname, &cctx);
1196         if (ret)
1197                 goto fail;
1198         ret = convert_check_state(&cctx);
1199         if (ret)
1200                 warning(
1201                 "source filesystem is not clean, running filesystem check is recommended");
1202         ret = convert_read_used_space(&cctx);
1203         if (ret)
1204                 goto fail;
1205
1206         blocksize = cctx.blocksize;
1207         total_bytes = (u64)blocksize * (u64)cctx.block_count;
1208         if (blocksize < 4096) {
1209                 error("block size is too small: %u < 4096", blocksize);
1210                 goto fail;
1211         }
1212         if (btrfs_check_nodesize(nodesize, blocksize, features))
1213                 goto fail;
1214         fd = open(devname, O_RDWR);
1215         if (fd < 0) {
1216                 error("unable to open %s: %s", devname, strerror(errno));
1217                 goto fail;
1218         }
1219         btrfs_parse_features_to_string(features_buf, features);
1220         if (features == BTRFS_MKFS_DEFAULT_FEATURES)
1221                 strcat(features_buf, " (default)");
1222
1223         printf("create btrfs filesystem:\n");
1224         printf("\tblocksize: %u\n", blocksize);
1225         printf("\tnodesize:  %u\n", nodesize);
1226         printf("\tfeatures:  %s\n", features_buf);
1227
1228         mkfs_cfg.label = cctx.volume_name;
1229         mkfs_cfg.num_bytes = total_bytes;
1230         mkfs_cfg.nodesize = nodesize;
1231         mkfs_cfg.sectorsize = blocksize;
1232         mkfs_cfg.stripesize = blocksize;
1233         mkfs_cfg.features = features;
1234         /* New convert need these space */
1235         memset(mkfs_cfg.chunk_uuid, 0, BTRFS_UUID_UNPARSED_SIZE);
1236         memset(mkfs_cfg.fs_uuid, 0, BTRFS_UUID_UNPARSED_SIZE);
1237
1238         ret = make_convert_btrfs(fd, &mkfs_cfg, &cctx);
1239         if (ret) {
1240                 error("unable to create initial ctree: %s", strerror(-ret));
1241                 goto fail;
1242         }
1243
1244         root = open_ctree_fd(fd, devname, mkfs_cfg.super_bytenr,
1245                              OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1246         if (!root) {
1247                 error("unable to open ctree");
1248                 goto fail;
1249         }
1250         ret = init_btrfs(&mkfs_cfg, root, &cctx, datacsum, packing, noxattr);
1251         if (ret) {
1252                 error("unable to setup the root tree: %d", ret);
1253                 goto fail;
1254         }
1255
1256         printf("creating %s image file\n", cctx.convert_ops->name);
1257         ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name);
1258         if (ret < 0) {
1259                 error("memory allocation failure for subvolume name: %s_saved",
1260                         cctx.convert_ops->name);
1261                 goto fail;
1262         }
1263         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1264         key.offset = (u64)-1;
1265         key.type = BTRFS_ROOT_ITEM_KEY;
1266         image_root = btrfs_read_fs_root(root->fs_info, &key);
1267         if (!image_root) {
1268                 error("unable to create image subvolume");
1269                 goto fail;
1270         }
1271         ret = create_image(image_root, &mkfs_cfg, &cctx, fd,
1272                               mkfs_cfg.num_bytes, "image", datacsum);
1273         if (ret) {
1274                 error("failed to create %s/image: %d", subvol_name, ret);
1275                 goto fail;
1276         }
1277
1278         printf("creating btrfs metadata");
1279         ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
1280         ctx.cur_copy_inodes = 0;
1281
1282         if (progress) {
1283                 ctx.info = task_init(print_copied_inodes, after_copied_inodes,
1284                                      &ctx);
1285                 task_start(ctx.info);
1286         }
1287         ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx);
1288         if (ret) {
1289                 error("error during copy_inodes %d", ret);
1290                 goto fail;
1291         }
1292         if (progress) {
1293                 task_stop(ctx.info);
1294                 task_deinit(ctx.info);
1295         }
1296
1297         image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
1298         if (!image_root) {
1299                 error("unable to link subvolume %s", subvol_name);
1300                 goto fail;
1301         }
1302
1303         free(subvol_name);
1304
1305         memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
1306         if (copylabel == 1) {
1307                 __strncpy_null(root->fs_info->super_copy->label,
1308                                 cctx.volume_name, BTRFS_LABEL_SIZE - 1);
1309                 printf("copy label '%s'\n", root->fs_info->super_copy->label);
1310         } else if (copylabel == -1) {
1311                 strcpy(root->fs_info->super_copy->label, fslabel);
1312                 printf("set label to '%s'\n", fslabel);
1313         }
1314
1315         ret = close_ctree(root);
1316         if (ret) {
1317                 error("close_ctree failed: %d", ret);
1318                 goto fail;
1319         }
1320         convert_close_fs(&cctx);
1321         clean_convert_context(&cctx);
1322
1323         /*
1324          * If this step succeed, we get a mountable btrfs. Otherwise
1325          * the source fs is left unchanged.
1326          */
1327         ret = migrate_super_block(fd, mkfs_cfg.super_bytenr);
1328         if (ret) {
1329                 error("unable to migrate super block: %d", ret);
1330                 goto fail;
1331         }
1332
1333         root = open_ctree_fd(fd, devname, 0,
1334                         OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1335         if (!root) {
1336                 error("unable to open ctree for finalization");
1337                 goto fail;
1338         }
1339         root->fs_info->finalize_on_close = 1;
1340         close_ctree(root);
1341         close(fd);
1342
1343         printf("conversion complete");
1344         return 0;
1345 fail:
1346         clean_convert_context(&cctx);
1347         if (fd != -1)
1348                 close(fd);
1349         warning(
1350 "an error occurred during conversion, filesystem is partially created but not finalized and not mountable");
1351         return -1;
1352 }
1353
1354 /*
1355  * Check if a non 1:1 mapped chunk can be rolled back.
1356  * For new convert, it's OK while for old convert it's not.
1357  */
1358 static int may_rollback_chunk(struct btrfs_fs_info *fs_info, u64 bytenr)
1359 {
1360         struct btrfs_block_group_cache *bg;
1361         struct btrfs_key key;
1362         struct btrfs_path path;
1363         struct btrfs_root *extent_root = fs_info->extent_root;
1364         u64 bg_start;
1365         u64 bg_end;
1366         int ret;
1367
1368         bg = btrfs_lookup_first_block_group(fs_info, bytenr);
1369         if (!bg)
1370                 return -ENOENT;
1371         bg_start = bg->key.objectid;
1372         bg_end = bg->key.objectid + bg->key.offset;
1373
1374         key.objectid = bg_end;
1375         key.type = BTRFS_METADATA_ITEM_KEY;
1376         key.offset = 0;
1377         btrfs_init_path(&path);
1378
1379         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
1380         if (ret < 0)
1381                 return ret;
1382
1383         while (1) {
1384                 struct btrfs_extent_item *ei;
1385
1386                 ret = btrfs_previous_extent_item(extent_root, &path, bg_start);
1387                 if (ret > 0) {
1388                         ret = 0;
1389                         break;
1390                 }
1391                 if (ret < 0)
1392                         break;
1393
1394                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1395                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1396                         continue;
1397                 /* Now it's EXTENT_ITEM_KEY only */
1398                 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
1399                                     struct btrfs_extent_item);
1400                 /*
1401                  * Found data extent, means this is old convert must follow 1:1
1402                  * mapping.
1403                  */
1404                 if (btrfs_extent_flags(path.nodes[0], ei)
1405                                 & BTRFS_EXTENT_FLAG_DATA) {
1406                         ret = -EINVAL;
1407                         break;
1408                 }
1409         }
1410         btrfs_release_path(&path);
1411         return ret;
1412 }
1413
1414 static int may_rollback(struct btrfs_root *root)
1415 {
1416         struct btrfs_fs_info *info = root->fs_info;
1417         struct btrfs_multi_bio *multi = NULL;
1418         u64 bytenr;
1419         u64 length;
1420         u64 physical;
1421         u64 total_bytes;
1422         int num_stripes;
1423         int ret;
1424
1425         if (btrfs_super_num_devices(info->super_copy) != 1)
1426                 goto fail;
1427
1428         bytenr = BTRFS_SUPER_INFO_OFFSET;
1429         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1430
1431         while (1) {
1432                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
1433                                       &length, &multi, 0, NULL);
1434                 if (ret) {
1435                         if (ret == -ENOENT) {
1436                                 /* removed block group at the tail */
1437                                 if (length == (u64)-1)
1438                                         break;
1439
1440                                 /* removed block group in the middle */
1441                                 goto next;
1442                         }
1443                         goto fail;
1444                 }
1445
1446                 num_stripes = multi->num_stripes;
1447                 physical = multi->stripes[0].physical;
1448                 free(multi);
1449
1450                 if (num_stripes != 1) {
1451                         error("num stripes for bytenr %llu is not 1", bytenr);
1452                         goto fail;
1453                 }
1454
1455                 /*
1456                  * Extra check for new convert, as metadata chunk from new
1457                  * convert is much more free than old convert, it doesn't need
1458                  * to do 1:1 mapping.
1459                  */
1460                 if (physical != bytenr) {
1461                         /*
1462                          * Check if it's a metadata chunk and has only metadata
1463                          * extent.
1464                          */
1465                         ret = may_rollback_chunk(info, bytenr);
1466                         if (ret < 0)
1467                                 goto fail;
1468                 }
1469 next:
1470                 bytenr += length;
1471                 if (bytenr >= total_bytes)
1472                         break;
1473         }
1474         return 0;
1475 fail:
1476         return -1;
1477 }
1478
1479 static int do_rollback(const char *devname)
1480 {
1481         int fd = -1;
1482         int ret;
1483         int i;
1484         struct btrfs_root *root;
1485         struct btrfs_root *image_root;
1486         struct btrfs_root *chunk_root;
1487         struct btrfs_dir_item *dir;
1488         struct btrfs_inode_item *inode;
1489         struct btrfs_file_extent_item *fi;
1490         struct btrfs_trans_handle *trans;
1491         struct extent_buffer *leaf;
1492         struct btrfs_block_group_cache *cache1;
1493         struct btrfs_block_group_cache *cache2;
1494         struct btrfs_key key;
1495         struct btrfs_path path;
1496         struct extent_io_tree io_tree;
1497         char *buf = NULL;
1498         char *name;
1499         u64 bytenr;
1500         u64 num_bytes;
1501         u64 root_dir;
1502         u64 objectid;
1503         u64 offset;
1504         u64 start;
1505         u64 end;
1506         u64 sb_bytenr;
1507         u64 first_free;
1508         u64 total_bytes;
1509         u32 sectorsize;
1510
1511         extent_io_tree_init(&io_tree);
1512
1513         fd = open(devname, O_RDWR);
1514         if (fd < 0) {
1515                 error("unable to open %s: %s", devname, strerror(errno));
1516                 goto fail;
1517         }
1518         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
1519         if (!root) {
1520                 error("unable to open ctree");
1521                 goto fail;
1522         }
1523         ret = may_rollback(root);
1524         if (ret < 0) {
1525                 error("unable to do rollback: %d", ret);
1526                 goto fail;
1527         }
1528
1529         sectorsize = root->sectorsize;
1530         buf = malloc(sectorsize);
1531         if (!buf) {
1532                 error("unable to allocate memory");
1533                 goto fail;
1534         }
1535
1536         btrfs_init_path(&path);
1537
1538         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1539         key.type = BTRFS_ROOT_BACKREF_KEY;
1540         key.offset = BTRFS_FS_TREE_OBJECTID;
1541         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0,
1542                                 0);
1543         btrfs_release_path(&path);
1544         if (ret > 0) {
1545                 error("unable to convert ext2 image subvolume, is it deleted?");
1546                 goto fail;
1547         } else if (ret < 0) {
1548                 error("unable to open ext2_saved, id %llu: %s",
1549                         (unsigned long long)key.objectid, strerror(-ret));
1550                 goto fail;
1551         }
1552
1553         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1554         key.type = BTRFS_ROOT_ITEM_KEY;
1555         key.offset = (u64)-1;
1556         image_root = btrfs_read_fs_root(root->fs_info, &key);
1557         if (!image_root || IS_ERR(image_root)) {
1558                 error("unable to open subvolume %llu: %ld",
1559                         (unsigned long long)key.objectid, PTR_ERR(image_root));
1560                 goto fail;
1561         }
1562
1563         name = "image";
1564         root_dir = btrfs_root_dirid(&root->root_item);
1565         dir = btrfs_lookup_dir_item(NULL, image_root, &path,
1566                                    root_dir, name, strlen(name), 0);
1567         if (!dir || IS_ERR(dir)) {
1568                 error("unable to find file %s: %ld", name, PTR_ERR(dir));
1569                 goto fail;
1570         }
1571         leaf = path.nodes[0];
1572         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
1573         btrfs_release_path(&path);
1574
1575         objectid = key.objectid;
1576
1577         ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0);
1578         if (ret) {
1579                 error("unable to find inode item: %d", ret);
1580                 goto fail;
1581         }
1582         leaf = path.nodes[0];
1583         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
1584         total_bytes = btrfs_inode_size(leaf, inode);
1585         btrfs_release_path(&path);
1586
1587         key.objectid = objectid;
1588         key.offset = 0;
1589         key.type = BTRFS_EXTENT_DATA_KEY;
1590         ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0);
1591         if (ret != 0) {
1592                 error("unable to find first file extent: %d", ret);
1593                 btrfs_release_path(&path);
1594                 goto fail;
1595         }
1596
1597         /* build mapping tree for the relocated blocks */
1598         for (offset = 0; offset < total_bytes; ) {
1599                 leaf = path.nodes[0];
1600                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1601                         ret = btrfs_next_leaf(root, &path);
1602                         if (ret != 0)
1603                                 break;  
1604                         continue;
1605                 }
1606
1607                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1608                 if (key.objectid != objectid || key.offset != offset ||
1609                     key.type != BTRFS_EXTENT_DATA_KEY)
1610                         break;
1611
1612                 fi = btrfs_item_ptr(leaf, path.slots[0],
1613                                     struct btrfs_file_extent_item);
1614                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
1615                         break;
1616                 if (btrfs_file_extent_compression(leaf, fi) ||
1617                     btrfs_file_extent_encryption(leaf, fi) ||
1618                     btrfs_file_extent_other_encoding(leaf, fi))
1619                         break;
1620
1621                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1622                 /* skip holes and direct mapped extents */
1623                 if (bytenr == 0 || bytenr == offset)
1624                         goto next_extent;
1625
1626                 bytenr += btrfs_file_extent_offset(leaf, fi);
1627                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
1628
1629                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1630                 cache2 = btrfs_lookup_block_group(root->fs_info,
1631                                                   offset + num_bytes - 1);
1632                 /*
1633                  * Here we must take consideration of old and new convert
1634                  * behavior.
1635                  * For old convert case, sign, there is no consist chunk type
1636                  * that will cover the extent. META/DATA/SYS are all possible.
1637                  * Just ensure relocate one is in SYS chunk.
1638                  * For new convert case, they are all covered by DATA chunk.
1639                  *
1640                  * So, there is not valid chunk type check for it now.
1641                  */
1642                 if (cache1 != cache2)
1643                         break;
1644
1645                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
1646                                 EXTENT_LOCKED, GFP_NOFS);
1647                 set_state_private(&io_tree, offset, bytenr);
1648 next_extent:
1649                 offset += btrfs_file_extent_num_bytes(leaf, fi);
1650                 path.slots[0]++;
1651         }
1652         btrfs_release_path(&path);
1653
1654         if (offset < total_bytes) {
1655                 error("unable to build extent mapping (offset %llu, total_bytes %llu)",
1656                                 (unsigned long long)offset,
1657                                 (unsigned long long)total_bytes);
1658                 error("converted filesystem after balance is unable to rollback");
1659                 goto fail;
1660         }
1661
1662         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
1663         first_free &= ~((u64)sectorsize - 1);
1664         /* backup for extent #0 should exist */
1665         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
1666                 error("no backup for the first extent");
1667                 goto fail;
1668         }
1669         /* force no allocation from system block group */
1670         root->fs_info->system_allocs = -1;
1671         trans = btrfs_start_transaction(root, 1);
1672         if (!trans) {
1673                 error("unable to start transaction");
1674                 goto fail;
1675         }
1676         /*
1677          * recow the whole chunk tree, this will remove all chunk tree blocks
1678          * from system block group
1679          */
1680         chunk_root = root->fs_info->chunk_root;
1681         memset(&key, 0, sizeof(key));
1682         while (1) {
1683                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
1684                 if (ret < 0)
1685                         break;
1686
1687                 ret = btrfs_next_leaf(chunk_root, &path);
1688                 if (ret)
1689                         break;
1690
1691                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1692                 btrfs_release_path(&path);
1693         }
1694         btrfs_release_path(&path);
1695
1696         offset = 0;
1697         num_bytes = 0;
1698         while(1) {
1699                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1700                 if (!cache1)
1701                         break;
1702
1703                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
1704                         num_bytes += btrfs_block_group_used(&cache1->item);
1705
1706                 offset = cache1->key.objectid + cache1->key.offset;
1707         }
1708         /* only extent #0 left in system block group? */
1709         if (num_bytes > first_free) {
1710                 error(
1711         "unable to empty system block group (num_bytes %llu, first_free %llu",
1712                                 (unsigned long long)num_bytes,
1713                                 (unsigned long long)first_free);
1714                 goto fail;
1715         }
1716         /* create a system chunk that maps the whole device */
1717         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
1718         if (ret) {
1719                 error("unable to update system chunk: %d", ret);
1720                 goto fail;
1721         }
1722
1723         ret = btrfs_commit_transaction(trans, root);
1724         if (ret) {
1725                 error("transaction commit failed: %d", ret);
1726                 goto fail;
1727         }
1728
1729         ret = close_ctree(root);
1730         if (ret) {
1731                 error("close_ctree failed: %d", ret);
1732                 goto fail;
1733         }
1734
1735         /* zero btrfs super block mirrors */
1736         memset(buf, 0, sectorsize);
1737         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1738                 bytenr = btrfs_sb_offset(i);
1739                 if (bytenr >= total_bytes)
1740                         break;
1741                 ret = pwrite(fd, buf, sectorsize, bytenr);
1742                 if (ret != sectorsize) {
1743                         error("zeroing superblock mirror %d failed: %d",
1744                                         i, ret);
1745                         goto fail;
1746                 }
1747         }
1748
1749         sb_bytenr = (u64)-1;
1750         /* copy all relocated blocks back */
1751         while(1) {
1752                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
1753                                             EXTENT_LOCKED);
1754                 if (ret)
1755                         break;
1756
1757                 ret = get_state_private(&io_tree, start, &bytenr);
1758                 BUG_ON(ret);
1759
1760                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
1761                                   GFP_NOFS);
1762
1763                 while (start <= end) {
1764                         if (start == BTRFS_SUPER_INFO_OFFSET) {
1765                                 sb_bytenr = bytenr;
1766                                 goto next_sector;
1767                         }
1768                         ret = pread(fd, buf, sectorsize, bytenr);
1769                         if (ret < 0) {
1770                                 error("reading superblock at %llu failed: %d",
1771                                                 (unsigned long long)bytenr, ret);
1772                                 goto fail;
1773                         }
1774                         BUG_ON(ret != sectorsize);
1775                         ret = pwrite(fd, buf, sectorsize, start);
1776                         if (ret < 0) {
1777                                 error("writing superblock at %llu failed: %d",
1778                                                 (unsigned long long)start, ret);
1779                                 goto fail;
1780                         }
1781                         BUG_ON(ret != sectorsize);
1782 next_sector:
1783                         start += sectorsize;
1784                         bytenr += sectorsize;
1785                 }
1786         }
1787
1788         ret = fsync(fd);
1789         if (ret < 0) {
1790                 error("fsync failed: %s", strerror(errno));
1791                 goto fail;
1792         }
1793         /*
1794          * finally, overwrite btrfs super block.
1795          */
1796         ret = pread(fd, buf, sectorsize, sb_bytenr);
1797         if (ret < 0) {
1798                 error("reading primary superblock failed: %s",
1799                                 strerror(errno));
1800                 goto fail;
1801         }
1802         BUG_ON(ret != sectorsize);
1803         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1804         if (ret < 0) {
1805                 error("writing primary superblock failed: %s",
1806                                 strerror(errno));
1807                 goto fail;
1808         }
1809         BUG_ON(ret != sectorsize);
1810         ret = fsync(fd);
1811         if (ret < 0) {
1812                 error("fsync failed: %s", strerror(errno));
1813                 goto fail;
1814         }
1815
1816         close(fd);
1817         free(buf);
1818         extent_io_tree_cleanup(&io_tree);
1819         printf("rollback complete\n");
1820         return 0;
1821
1822 fail:
1823         if (fd != -1)
1824                 close(fd);
1825         free(buf);
1826         error("rollback aborted");
1827         return -1;
1828 }
1829
1830 static void print_usage(void)
1831 {
1832         printf("usage: btrfs-convert [options] device\n");
1833         printf("options:\n");
1834         printf("\t-d|--no-datasum        disable data checksum, sets NODATASUM\n");
1835         printf("\t-i|--no-xattr          ignore xattrs and ACLs\n");
1836         printf("\t-n|--no-inline         disable inlining of small files to metadata\n");
1837         printf("\t-N|--nodesize SIZE     set filesystem metadata nodesize\n");
1838         printf("\t-r|--rollback          roll back to the original filesystem\n");
1839         printf("\t-l|--label LABEL       set filesystem label\n");
1840         printf("\t-L|--copy-label        use label from converted filesystem\n");
1841         printf("\t-p|--progress          show converting progress (default)\n");
1842         printf("\t-O|--features LIST     comma separated list of filesystem features\n");
1843         printf("\t--no-progress          show only overview, not the detailed progress\n");
1844         printf("\n");
1845         printf("Supported filesystems:\n");
1846         printf("\text2/3/4: %s\n", BTRFSCONVERT_EXT2 ? "yes" : "no");
1847 }
1848
1849 int main(int argc, char *argv[])
1850 {
1851         int ret;
1852         int packing = 1;
1853         int noxattr = 0;
1854         int datacsum = 1;
1855         u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
1856                         BTRFS_MKFS_DEFAULT_NODE_SIZE);
1857         int rollback = 0;
1858         int copylabel = 0;
1859         int usage_error = 0;
1860         int progress = 1;
1861         char *file;
1862         char fslabel[BTRFS_LABEL_SIZE];
1863         u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
1864
1865         while(1) {
1866                 enum { GETOPT_VAL_NO_PROGRESS = 256 };
1867                 static const struct option long_options[] = {
1868                         { "no-progress", no_argument, NULL,
1869                                 GETOPT_VAL_NO_PROGRESS },
1870                         { "no-datasum", no_argument, NULL, 'd' },
1871                         { "no-inline", no_argument, NULL, 'n' },
1872                         { "no-xattr", no_argument, NULL, 'i' },
1873                         { "rollback", no_argument, NULL, 'r' },
1874                         { "features", required_argument, NULL, 'O' },
1875                         { "progress", no_argument, NULL, 'p' },
1876                         { "label", required_argument, NULL, 'l' },
1877                         { "copy-label", no_argument, NULL, 'L' },
1878                         { "nodesize", required_argument, NULL, 'N' },
1879                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
1880                         { NULL, 0, NULL, 0 }
1881                 };
1882                 int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL);
1883
1884                 if (c < 0)
1885                         break;
1886                 switch(c) {
1887                         case 'd':
1888                                 datacsum = 0;
1889                                 break;
1890                         case 'i':
1891                                 noxattr = 1;
1892                                 break;
1893                         case 'n':
1894                                 packing = 0;
1895                                 break;
1896                         case 'N':
1897                                 nodesize = parse_size(optarg);
1898                                 break;
1899                         case 'r':
1900                                 rollback = 1;
1901                                 break;
1902                         case 'l':
1903                                 copylabel = -1;
1904                                 if (strlen(optarg) >= BTRFS_LABEL_SIZE) {
1905                                         warning(
1906                                         "label too long, trimmed to %d bytes",
1907                                                 BTRFS_LABEL_SIZE - 1);
1908                                 }
1909                                 __strncpy_null(fslabel, optarg, BTRFS_LABEL_SIZE - 1);
1910                                 break;
1911                         case 'L':
1912                                 copylabel = 1;
1913                                 break;
1914                         case 'p':
1915                                 progress = 1;
1916                                 break;
1917                         case 'O': {
1918                                 char *orig = strdup(optarg);
1919                                 char *tmp = orig;
1920
1921                                 tmp = btrfs_parse_fs_features(tmp, &features);
1922                                 if (tmp) {
1923                                         error("unrecognized filesystem feature: %s",
1924                                                         tmp);
1925                                         free(orig);
1926                                         exit(1);
1927                                 }
1928                                 free(orig);
1929                                 if (features & BTRFS_FEATURE_LIST_ALL) {
1930                                         btrfs_list_all_fs_features(
1931                                                 ~BTRFS_CONVERT_ALLOWED_FEATURES);
1932                                         exit(0);
1933                                 }
1934                                 if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) {
1935                                         char buf[64];
1936
1937                                         btrfs_parse_features_to_string(buf,
1938                                                 features & ~BTRFS_CONVERT_ALLOWED_FEATURES);
1939                                         error("features not allowed for convert: %s",
1940                                                 buf);
1941                                         exit(1);
1942                                 }
1943
1944                                 break;
1945                                 }
1946                         case GETOPT_VAL_NO_PROGRESS:
1947                                 progress = 0;
1948                                 break;
1949                         case GETOPT_VAL_HELP:
1950                         default:
1951                                 print_usage();
1952                                 return c != GETOPT_VAL_HELP;
1953                 }
1954         }
1955         set_argv0(argv);
1956         if (check_argc_exact(argc - optind, 1)) {
1957                 print_usage();
1958                 return 1;
1959         }
1960
1961         if (rollback && (!datacsum || noxattr || !packing)) {
1962                 fprintf(stderr,
1963                         "Usage error: -d, -i, -n options do not apply to rollback\n");
1964                 usage_error++;
1965         }
1966
1967         if (usage_error) {
1968                 print_usage();
1969                 return 1;
1970         }
1971
1972         file = argv[optind];
1973         ret = check_mounted(file);
1974         if (ret < 0) {
1975                 error("could not check mount status: %s", strerror(-ret));
1976                 return 1;
1977         } else if (ret) {
1978                 error("%s is mounted", file);
1979                 return 1;
1980         }
1981
1982         if (rollback) {
1983                 ret = do_rollback(file);
1984         } else {
1985                 ret = do_convert(file, datacsum, packing, noxattr, nodesize,
1986                                 copylabel, fslabel, progress, features);
1987         }
1988         if (ret)
1989                 return 1;
1990         return 0;
1991 }