b115e24fb28d9506b2982dff0812234fcab0cc64
[platform/upstream/btrfs-progs.git] / convert / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include "kerncompat.h"
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <getopt.h>
27
28 #include "ctree.h"
29 #include "disk-io.h"
30 #include "volumes.h"
31 #include "transaction.h"
32 #include "utils.h"
33 #include "task-utils.h"
34 #include "help.h"
35 #include "mkfs/common.h"
36 #include "convert/common.h"
37 #include "convert/source-fs.h"
38 #include "fsfeatures.h"
39
40 const struct btrfs_convert_operations ext2_convert_ops;
41
42 static const struct btrfs_convert_operations *convert_operations[] = {
43 #if BTRFSCONVERT_EXT2
44         &ext2_convert_ops,
45 #endif
46 };
47
48 static void *print_copied_inodes(void *p)
49 {
50         struct task_ctx *priv = p;
51         const char work_indicator[] = { '.', 'o', 'O', 'o' };
52         u64 count = 0;
53
54         task_period_start(priv->info, 1000 /* 1s */);
55         while (1) {
56                 count++;
57                 printf("copy inodes [%c] [%10llu/%10llu]\r",
58                        work_indicator[count % 4],
59                        (unsigned long long)priv->cur_copy_inodes,
60                        (unsigned long long)priv->max_copy_inodes);
61                 fflush(stdout);
62                 task_period_wait(priv->info);
63         }
64
65         return NULL;
66 }
67
68 static int after_copied_inodes(void *p)
69 {
70         printf("\n");
71         fflush(stdout);
72
73         return 0;
74 }
75
76 static inline int copy_inodes(struct btrfs_convert_context *cctx,
77                               struct btrfs_root *root, u32 convert_flags,
78                               struct task_ctx *p)
79 {
80         return cctx->convert_ops->copy_inodes(cctx, root, convert_flags, p);
81 }
82
83 static inline void convert_close_fs(struct btrfs_convert_context *cctx)
84 {
85         cctx->convert_ops->close_fs(cctx);
86 }
87
88 static inline int convert_check_state(struct btrfs_convert_context *cctx)
89 {
90         return cctx->convert_ops->check_state(cctx);
91 }
92
93 static int csum_disk_extent(struct btrfs_trans_handle *trans,
94                             struct btrfs_root *root,
95                             u64 disk_bytenr, u64 num_bytes)
96 {
97         u32 blocksize = root->sectorsize;
98         u64 offset;
99         char *buffer;
100         int ret = 0;
101
102         buffer = malloc(blocksize);
103         if (!buffer)
104                 return -ENOMEM;
105         for (offset = 0; offset < num_bytes; offset += blocksize) {
106                 ret = read_disk_extent(root, disk_bytenr + offset,
107                                         blocksize, buffer);
108                 if (ret)
109                         break;
110                 ret = btrfs_csum_file_block(trans,
111                                             root->fs_info->csum_root,
112                                             disk_bytenr + num_bytes,
113                                             disk_bytenr + offset,
114                                             buffer, blocksize);
115                 if (ret)
116                         break;
117         }
118         free(buffer);
119         return ret;
120 }
121
122 static int create_image_file_range(struct btrfs_trans_handle *trans,
123                                       struct btrfs_root *root,
124                                       struct cache_tree *used,
125                                       struct btrfs_inode_item *inode,
126                                       u64 ino, u64 bytenr, u64 *ret_len,
127                                       u32 convert_flags)
128 {
129         struct cache_extent *cache;
130         struct btrfs_block_group_cache *bg_cache;
131         u64 len = *ret_len;
132         u64 disk_bytenr;
133         int i;
134         int ret;
135         u32 datacsum = convert_flags & CONVERT_FLAG_DATACSUM;
136
137         if (bytenr != round_down(bytenr, root->sectorsize)) {
138                 error("bytenr not sectorsize aligned: %llu",
139                                 (unsigned long long)bytenr);
140                 return -EINVAL;
141         }
142         if (len != round_down(len, root->sectorsize)) {
143                 error("length not sectorsize aligned: %llu",
144                                 (unsigned long long)len);
145                 return -EINVAL;
146         }
147         len = min_t(u64, len, BTRFS_MAX_EXTENT_SIZE);
148
149         /*
150          * Skip sb ranges first
151          * [0, 1M), [sb_offset(1), +64K), [sb_offset(2), +64K].
152          *
153          * Or we will insert a hole into current image file, and later
154          * migrate block will fail as there is already a file extent.
155          */
156         if (bytenr < 1024 * 1024) {
157                 *ret_len = 1024 * 1024 - bytenr;
158                 return 0;
159         }
160         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
161                 u64 cur = btrfs_sb_offset(i);
162
163                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
164                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
165                         return 0;
166                 }
167         }
168         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
169                 u64 cur = btrfs_sb_offset(i);
170
171                 /*
172                  *      |--reserved--|
173                  * |----range-------|
174                  * May still need to go through file extent inserts
175                  */
176                 if (bytenr < cur && bytenr + len >= cur) {
177                         len = min_t(u64, len, cur - bytenr);
178                         break;
179                 }
180                 /*
181                  * |--reserved--|
182                  *      |---range---|
183                  * Drop out, no need to insert anything
184                  */
185                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
186                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
187                         return 0;
188                 }
189         }
190
191         cache = search_cache_extent(used, bytenr);
192         if (cache) {
193                 if (cache->start <= bytenr) {
194                         /*
195                          * |///////Used///////|
196                          *      |<--insert--->|
197                          *      bytenr
198                          */
199                         len = min_t(u64, len, cache->start + cache->size -
200                                     bytenr);
201                         disk_bytenr = bytenr;
202                 } else {
203                         /*
204                          *              |//Used//|
205                          *  |<-insert-->|
206                          *  bytenr
207                          */
208                         len = min(len, cache->start - bytenr);
209                         disk_bytenr = 0;
210                         datacsum = 0;
211                 }
212         } else {
213                 /*
214                  * |//Used//|           |EOF
215                  *          |<-insert-->|
216                  *          bytenr
217                  */
218                 disk_bytenr = 0;
219                 datacsum = 0;
220         }
221
222         if (disk_bytenr) {
223                 /* Check if the range is in a data block group */
224                 bg_cache = btrfs_lookup_block_group(root->fs_info, bytenr);
225                 if (!bg_cache)
226                         return -ENOENT;
227                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
228                         return -EINVAL;
229
230                 /* The extent should never cross block group boundary */
231                 len = min_t(u64, len, bg_cache->key.objectid +
232                             bg_cache->key.offset - bytenr);
233         }
234
235         if (len != round_down(len, root->sectorsize)) {
236                 error("remaining length not sectorsize aligned: %llu",
237                                 (unsigned long long)len);
238                 return -EINVAL;
239         }
240         ret = btrfs_record_file_extent(trans, root, ino, inode, bytenr,
241                                        disk_bytenr, len);
242         if (ret < 0)
243                 return ret;
244
245         if (datacsum)
246                 ret = csum_disk_extent(trans, root, bytenr, len);
247         *ret_len = len;
248         return ret;
249 }
250
251 /*
252  * Relocate old fs data in one reserved ranges
253  *
254  * Since all old fs data in reserved range is not covered by any chunk nor
255  * data extent, we don't need to handle any reference but add new
256  * extent/reference, which makes codes more clear
257  */
258 static int migrate_one_reserved_range(struct btrfs_trans_handle *trans,
259                                       struct btrfs_root *root,
260                                       struct cache_tree *used,
261                                       struct btrfs_inode_item *inode, int fd,
262                                       u64 ino, u64 start, u64 len,
263                                       u32 convert_flags)
264 {
265         u64 cur_off = start;
266         u64 cur_len = len;
267         u64 hole_start = start;
268         u64 hole_len;
269         struct cache_extent *cache;
270         struct btrfs_key key;
271         struct extent_buffer *eb;
272         int ret = 0;
273
274         while (cur_off < start + len) {
275                 cache = lookup_cache_extent(used, cur_off, cur_len);
276                 if (!cache)
277                         break;
278                 cur_off = max(cache->start, cur_off);
279                 cur_len = min(cache->start + cache->size, start + len) -
280                           cur_off;
281                 BUG_ON(cur_len < root->sectorsize);
282
283                 /* reserve extent for the data */
284                 ret = btrfs_reserve_extent(trans, root, cur_len, 0, 0, (u64)-1,
285                                            &key, 1);
286                 if (ret < 0)
287                         break;
288
289                 eb = malloc(sizeof(*eb) + cur_len);
290                 if (!eb) {
291                         ret = -ENOMEM;
292                         break;
293                 }
294
295                 ret = pread(fd, eb->data, cur_len, cur_off);
296                 if (ret < cur_len) {
297                         ret = (ret < 0 ? ret : -EIO);
298                         free(eb);
299                         break;
300                 }
301                 eb->start = key.objectid;
302                 eb->len = key.offset;
303
304                 /* Write the data */
305                 ret = write_and_map_eb(trans, root, eb);
306                 free(eb);
307                 if (ret < 0)
308                         break;
309
310                 /* Now handle extent item and file extent things */
311                 ret = btrfs_record_file_extent(trans, root, ino, inode, cur_off,
312                                                key.objectid, key.offset);
313                 if (ret < 0)
314                         break;
315                 /* Finally, insert csum items */
316                 if (convert_flags & CONVERT_FLAG_DATACSUM)
317                         ret = csum_disk_extent(trans, root, key.objectid,
318                                                key.offset);
319
320                 /* Don't forget to insert hole */
321                 hole_len = cur_off - hole_start;
322                 if (hole_len) {
323                         ret = btrfs_record_file_extent(trans, root, ino, inode,
324                                         hole_start, 0, hole_len);
325                         if (ret < 0)
326                                 break;
327                 }
328
329                 cur_off += key.offset;
330                 hole_start = cur_off;
331                 cur_len = start + len - cur_off;
332         }
333         /* Last hole */
334         if (start + len - hole_start > 0)
335                 ret = btrfs_record_file_extent(trans, root, ino, inode,
336                                 hole_start, 0, start + len - hole_start);
337         return ret;
338 }
339
340 /*
341  * Relocate the used ext2 data in reserved ranges
342  * [0,1M)
343  * [btrfs_sb_offset(1), +BTRFS_STRIPE_LEN)
344  * [btrfs_sb_offset(2), +BTRFS_STRIPE_LEN)
345  */
346 static int migrate_reserved_ranges(struct btrfs_trans_handle *trans,
347                                    struct btrfs_root *root,
348                                    struct cache_tree *used,
349                                    struct btrfs_inode_item *inode, int fd,
350                                    u64 ino, u64 total_bytes, u32 convert_flags)
351 {
352         u64 cur_off;
353         u64 cur_len;
354         int ret = 0;
355
356         /* 0 ~ 1M */
357         cur_off = 0;
358         cur_len = 1024 * 1024;
359         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
360                                          cur_off, cur_len, convert_flags);
361         if (ret < 0)
362                 return ret;
363
364         /* second sb(fisrt sb is included in 0~1M) */
365         cur_off = btrfs_sb_offset(1);
366         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
367         if (cur_off > total_bytes)
368                 return ret;
369         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
370                                          cur_off, cur_len, convert_flags);
371         if (ret < 0)
372                 return ret;
373
374         /* Last sb */
375         cur_off = btrfs_sb_offset(2);
376         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
377         if (cur_off > total_bytes)
378                 return ret;
379         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
380                                          cur_off, cur_len, convert_flags);
381         return ret;
382 }
383
384 /*
385  * Helper for expand and merge extent_cache for wipe_one_reserved_range() to
386  * handle wiping a range that exists in cache.
387  */
388 static int _expand_extent_cache(struct cache_tree *tree,
389                                 struct cache_extent *entry,
390                                 u64 min_stripe_size, int backward)
391 {
392         struct cache_extent *ce;
393         int diff;
394
395         if (entry->size >= min_stripe_size)
396                 return 0;
397         diff = min_stripe_size - entry->size;
398
399         if (backward) {
400                 ce = prev_cache_extent(entry);
401                 if (!ce)
402                         goto expand_back;
403                 if (ce->start + ce->size >= entry->start - diff) {
404                         /* Directly merge with previous extent */
405                         ce->size = entry->start + entry->size - ce->start;
406                         remove_cache_extent(tree, entry);
407                         free(entry);
408                         return 0;
409                 }
410 expand_back:
411                 /* No overlap, normal extent */
412                 if (entry->start < diff) {
413                         error("cannot find space for data chunk layout");
414                         return -ENOSPC;
415                 }
416                 entry->start -= diff;
417                 entry->size += diff;
418                 return 0;
419         }
420         ce = next_cache_extent(entry);
421         if (!ce)
422                 goto expand_after;
423         if (entry->start + entry->size + diff >= ce->start) {
424                 /* Directly merge with next extent */
425                 entry->size = ce->start + ce->size - entry->start;
426                 remove_cache_extent(tree, ce);
427                 free(ce);
428                 return 0;
429         }
430 expand_after:
431         entry->size += diff;
432         return 0;
433 }
434
435 /*
436  * Remove one reserve range from given cache tree
437  * if min_stripe_size is non-zero, it will ensure for split case,
438  * all its split cache extent is no smaller than @min_strip_size / 2.
439  */
440 static int wipe_one_reserved_range(struct cache_tree *tree,
441                                    u64 start, u64 len, u64 min_stripe_size,
442                                    int ensure_size)
443 {
444         struct cache_extent *cache;
445         int ret;
446
447         BUG_ON(ensure_size && min_stripe_size == 0);
448         /*
449          * The logical here is simplified to handle special cases only
450          * So we don't need to consider merge case for ensure_size
451          */
452         BUG_ON(min_stripe_size && (min_stripe_size < len * 2 ||
453                min_stripe_size / 2 < BTRFS_STRIPE_LEN));
454
455         /* Also, wipe range should already be aligned */
456         BUG_ON(start != round_down(start, BTRFS_STRIPE_LEN) ||
457                start + len != round_up(start + len, BTRFS_STRIPE_LEN));
458
459         min_stripe_size /= 2;
460
461         cache = lookup_cache_extent(tree, start, len);
462         if (!cache)
463                 return 0;
464
465         if (start <= cache->start) {
466                 /*
467                  *      |--------cache---------|
468                  * |-wipe-|
469                  */
470                 BUG_ON(start + len <= cache->start);
471
472                 /*
473                  * The wipe size is smaller than min_stripe_size / 2,
474                  * so the result length should still meet min_stripe_size
475                  * And no need to do alignment
476                  */
477                 cache->size -= (start + len - cache->start);
478                 if (cache->size == 0) {
479                         remove_cache_extent(tree, cache);
480                         free(cache);
481                         return 0;
482                 }
483
484                 BUG_ON(ensure_size && cache->size < min_stripe_size);
485
486                 cache->start = start + len;
487                 return 0;
488         } else if (start > cache->start && start + len < cache->start +
489                    cache->size) {
490                 /*
491                  * |-------cache-----|
492                  *      |-wipe-|
493                  */
494                 u64 old_start = cache->start;
495                 u64 old_len = cache->size;
496                 u64 insert_start = start + len;
497                 u64 insert_len;
498
499                 cache->size = start - cache->start;
500                 /* Expand the leading half part if needed */
501                 if (ensure_size && cache->size < min_stripe_size) {
502                         ret = _expand_extent_cache(tree, cache,
503                                         min_stripe_size, 1);
504                         if (ret < 0)
505                                 return ret;
506                 }
507
508                 /* And insert the new one */
509                 insert_len = old_start + old_len - start - len;
510                 ret = add_merge_cache_extent(tree, insert_start, insert_len);
511                 if (ret < 0)
512                         return ret;
513
514                 /* Expand the last half part if needed */
515                 if (ensure_size && insert_len < min_stripe_size) {
516                         cache = lookup_cache_extent(tree, insert_start,
517                                                     insert_len);
518                         if (!cache || cache->start != insert_start ||
519                             cache->size != insert_len)
520                                 return -ENOENT;
521                         ret = _expand_extent_cache(tree, cache,
522                                         min_stripe_size, 0);
523                 }
524
525                 return ret;
526         }
527         /*
528          * |----cache-----|
529          *              |--wipe-|
530          * Wipe len should be small enough and no need to expand the
531          * remaining extent
532          */
533         cache->size = start - cache->start;
534         BUG_ON(ensure_size && cache->size < min_stripe_size);
535         return 0;
536 }
537
538 /*
539  * Remove reserved ranges from given cache_tree
540  *
541  * It will remove the following ranges
542  * 1) 0~1M
543  * 2) 2nd superblock, +64K (make sure chunks are 64K aligned)
544  * 3) 3rd superblock, +64K
545  *
546  * @min_stripe must be given for safety check
547  * and if @ensure_size is given, it will ensure affected cache_extent will be
548  * larger than min_stripe_size
549  */
550 static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size,
551                                 int ensure_size)
552 {
553         int ret;
554
555         ret = wipe_one_reserved_range(tree, 0, 1024 * 1024, min_stripe_size,
556                                       ensure_size);
557         if (ret < 0)
558                 return ret;
559         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(1),
560                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
561         if (ret < 0)
562                 return ret;
563         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(2),
564                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
565         return ret;
566 }
567
568 static int calculate_available_space(struct btrfs_convert_context *cctx)
569 {
570         struct cache_tree *used = &cctx->used_space;
571         struct cache_tree *data_chunks = &cctx->data_chunks;
572         struct cache_tree *free = &cctx->free_space;
573         struct cache_extent *cache;
574         u64 cur_off = 0;
575         /*
576          * Twice the minimal chunk size, to allow later wipe_reserved_ranges()
577          * works without need to consider overlap
578          */
579         u64 min_stripe_size = 2 * 16 * 1024 * 1024;
580         int ret;
581
582         /* Calculate data_chunks */
583         for (cache = first_cache_extent(used); cache;
584              cache = next_cache_extent(cache)) {
585                 u64 cur_len;
586
587                 if (cache->start + cache->size < cur_off)
588                         continue;
589                 if (cache->start > cur_off + min_stripe_size)
590                         cur_off = cache->start;
591                 cur_len = max(cache->start + cache->size - cur_off,
592                               min_stripe_size);
593                 ret = add_merge_cache_extent(data_chunks, cur_off, cur_len);
594                 if (ret < 0)
595                         goto out;
596                 cur_off += cur_len;
597         }
598         /*
599          * remove reserved ranges, so we won't ever bother relocating an old
600          * filesystem extent to other place.
601          */
602         ret = wipe_reserved_ranges(data_chunks, min_stripe_size, 1);
603         if (ret < 0)
604                 goto out;
605
606         cur_off = 0;
607         /*
608          * Calculate free space
609          * Always round up the start bytenr, to avoid metadata extent corss
610          * stripe boundary, as later mkfs_convert() won't have all the extent
611          * allocation check
612          */
613         for (cache = first_cache_extent(data_chunks); cache;
614              cache = next_cache_extent(cache)) {
615                 if (cache->start < cur_off)
616                         continue;
617                 if (cache->start > cur_off) {
618                         u64 insert_start;
619                         u64 len;
620
621                         len = cache->start - round_up(cur_off,
622                                                       BTRFS_STRIPE_LEN);
623                         insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
624
625                         ret = add_merge_cache_extent(free, insert_start, len);
626                         if (ret < 0)
627                                 goto out;
628                 }
629                 cur_off = cache->start + cache->size;
630         }
631         /* Don't forget the last range */
632         if (cctx->total_bytes > cur_off) {
633                 u64 len = cctx->total_bytes - cur_off;
634                 u64 insert_start;
635
636                 insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
637
638                 ret = add_merge_cache_extent(free, insert_start, len);
639                 if (ret < 0)
640                         goto out;
641         }
642
643         /* Remove reserved bytes */
644         ret = wipe_reserved_ranges(free, min_stripe_size, 0);
645 out:
646         return ret;
647 }
648
649 /*
650  * Read used space, and since we have the used space,
651  * calcuate data_chunks and free for later mkfs
652  */
653 static int convert_read_used_space(struct btrfs_convert_context *cctx)
654 {
655         int ret;
656
657         ret = cctx->convert_ops->read_used_space(cctx);
658         if (ret)
659                 return ret;
660
661         ret = calculate_available_space(cctx);
662         return ret;
663 }
664
665 /*
666  * Create the fs image file of old filesystem.
667  *
668  * This is completely fs independent as we have cctx->used, only
669  * need to create file extents pointing to all the positions.
670  */
671 static int create_image(struct btrfs_root *root,
672                            struct btrfs_mkfs_config *cfg,
673                            struct btrfs_convert_context *cctx, int fd,
674                            u64 size, char *name, u32 convert_flags)
675 {
676         struct btrfs_inode_item buf;
677         struct btrfs_trans_handle *trans;
678         struct btrfs_path path;
679         struct btrfs_key key;
680         struct cache_extent *cache;
681         struct cache_tree used_tmp;
682         u64 cur;
683         u64 ino;
684         u64 flags = BTRFS_INODE_READONLY;
685         int ret;
686
687         if (!(convert_flags & CONVERT_FLAG_DATACSUM))
688                 flags |= BTRFS_INODE_NODATASUM;
689
690         trans = btrfs_start_transaction(root, 1);
691         if (!trans)
692                 return -ENOMEM;
693
694         cache_tree_init(&used_tmp);
695         btrfs_init_path(&path);
696
697         ret = btrfs_find_free_objectid(trans, root, BTRFS_FIRST_FREE_OBJECTID,
698                                        &ino);
699         if (ret < 0)
700                 goto out;
701         ret = btrfs_new_inode(trans, root, ino, 0400 | S_IFREG);
702         if (ret < 0)
703                 goto out;
704         ret = btrfs_change_inode_flags(trans, root, ino, flags);
705         if (ret < 0)
706                 goto out;
707         ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name,
708                              strlen(name), BTRFS_FT_REG_FILE, NULL, 1);
709         if (ret < 0)
710                 goto out;
711
712         key.objectid = ino;
713         key.type = BTRFS_INODE_ITEM_KEY;
714         key.offset = 0;
715
716         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
717         if (ret) {
718                 ret = (ret > 0 ? -ENOENT : ret);
719                 goto out;
720         }
721         read_extent_buffer(path.nodes[0], &buf,
722                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
723                         sizeof(buf));
724         btrfs_release_path(&path);
725
726         /*
727          * Create a new used space cache, which doesn't contain the reserved
728          * range
729          */
730         for (cache = first_cache_extent(&cctx->used_space); cache;
731              cache = next_cache_extent(cache)) {
732                 ret = add_cache_extent(&used_tmp, cache->start, cache->size);
733                 if (ret < 0)
734                         goto out;
735         }
736         ret = wipe_reserved_ranges(&used_tmp, 0, 0);
737         if (ret < 0)
738                 goto out;
739
740         /*
741          * Start from 1M, as 0~1M is reserved, and create_image_file_range()
742          * can't handle bytenr 0(will consider it as a hole)
743          */
744         cur = 1024 * 1024;
745         while (cur < size) {
746                 u64 len = size - cur;
747
748                 ret = create_image_file_range(trans, root, &used_tmp,
749                                                 &buf, ino, cur, &len,
750                                                 convert_flags);
751                 if (ret < 0)
752                         goto out;
753                 cur += len;
754         }
755         /* Handle the reserved ranges */
756         ret = migrate_reserved_ranges(trans, root, &cctx->used_space, &buf, fd,
757                         ino, cfg->num_bytes, convert_flags);
758
759         key.objectid = ino;
760         key.type = BTRFS_INODE_ITEM_KEY;
761         key.offset = 0;
762         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
763         if (ret) {
764                 ret = (ret > 0 ? -ENOENT : ret);
765                 goto out;
766         }
767         btrfs_set_stack_inode_size(&buf, cfg->num_bytes);
768         write_extent_buffer(path.nodes[0], &buf,
769                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
770                         sizeof(buf));
771 out:
772         free_extent_cache_tree(&used_tmp);
773         btrfs_release_path(&path);
774         btrfs_commit_transaction(trans, root);
775         return ret;
776 }
777
778 static struct btrfs_root* link_subvol(struct btrfs_root *root,
779                 const char *base, u64 root_objectid)
780 {
781         struct btrfs_trans_handle *trans;
782         struct btrfs_fs_info *fs_info = root->fs_info;
783         struct btrfs_root *tree_root = fs_info->tree_root;
784         struct btrfs_root *new_root = NULL;
785         struct btrfs_path path;
786         struct btrfs_inode_item *inode_item;
787         struct extent_buffer *leaf;
788         struct btrfs_key key;
789         u64 dirid = btrfs_root_dirid(&root->root_item);
790         u64 index = 2;
791         char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
792         int len;
793         int i;
794         int ret;
795
796         len = strlen(base);
797         if (len == 0 || len > BTRFS_NAME_LEN)
798                 return NULL;
799
800         btrfs_init_path(&path);
801         key.objectid = dirid;
802         key.type = BTRFS_DIR_INDEX_KEY;
803         key.offset = (u64)-1;
804
805         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
806         if (ret <= 0) {
807                 error("search for DIR_INDEX dirid %llu failed: %d",
808                                 (unsigned long long)dirid, ret);
809                 goto fail;
810         }
811
812         if (path.slots[0] > 0) {
813                 path.slots[0]--;
814                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
815                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
816                         index = key.offset + 1;
817         }
818         btrfs_release_path(&path);
819
820         trans = btrfs_start_transaction(root, 1);
821         if (!trans) {
822                 error("unable to start transaction");
823                 goto fail;
824         }
825
826         key.objectid = dirid;
827         key.offset = 0;
828         key.type =  BTRFS_INODE_ITEM_KEY;
829
830         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
831         if (ret) {
832                 error("search for INODE_ITEM %llu failed: %d",
833                                 (unsigned long long)dirid, ret);
834                 goto fail;
835         }
836         leaf = path.nodes[0];
837         inode_item = btrfs_item_ptr(leaf, path.slots[0],
838                                     struct btrfs_inode_item);
839
840         key.objectid = root_objectid;
841         key.offset = (u64)-1;
842         key.type = BTRFS_ROOT_ITEM_KEY;
843
844         memcpy(buf, base, len);
845         for (i = 0; i < 1024; i++) {
846                 ret = btrfs_insert_dir_item(trans, root, buf, len,
847                                             dirid, &key, BTRFS_FT_DIR, index);
848                 if (ret != -EEXIST)
849                         break;
850                 len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
851                 if (len < 1 || len > BTRFS_NAME_LEN) {
852                         ret = -EINVAL;
853                         break;
854                 }
855         }
856         if (ret)
857                 goto fail;
858
859         btrfs_set_inode_size(leaf, inode_item, len * 2 +
860                              btrfs_inode_size(leaf, inode_item));
861         btrfs_mark_buffer_dirty(leaf);
862         btrfs_release_path(&path);
863
864         /* add the backref first */
865         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
866                                  BTRFS_ROOT_BACKREF_KEY,
867                                  root->root_key.objectid,
868                                  dirid, index, buf, len);
869         if (ret) {
870                 error("unable to add root backref for %llu: %d",
871                                 root->root_key.objectid, ret);
872                 goto fail;
873         }
874
875         /* now add the forward ref */
876         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
877                                  BTRFS_ROOT_REF_KEY, root_objectid,
878                                  dirid, index, buf, len);
879         if (ret) {
880                 error("unable to add root ref for %llu: %d",
881                                 root->root_key.objectid, ret);
882                 goto fail;
883         }
884
885         ret = btrfs_commit_transaction(trans, root);
886         if (ret) {
887                 error("transaction commit failed: %d", ret);
888                 goto fail;
889         }
890
891         new_root = btrfs_read_fs_root(fs_info, &key);
892         if (IS_ERR(new_root)) {
893                 error("unable to fs read root: %lu", PTR_ERR(new_root));
894                 new_root = NULL;
895         }
896 fail:
897         btrfs_init_path(&path);
898         return new_root;
899 }
900
901 static int create_subvol(struct btrfs_trans_handle *trans,
902                          struct btrfs_root *root, u64 root_objectid)
903 {
904         struct extent_buffer *tmp;
905         struct btrfs_root *new_root;
906         struct btrfs_key key;
907         struct btrfs_root_item root_item;
908         int ret;
909
910         ret = btrfs_copy_root(trans, root, root->node, &tmp,
911                               root_objectid);
912         if (ret)
913                 return ret;
914
915         memcpy(&root_item, &root->root_item, sizeof(root_item));
916         btrfs_set_root_bytenr(&root_item, tmp->start);
917         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
918         btrfs_set_root_generation(&root_item, trans->transid);
919         free_extent_buffer(tmp);
920
921         key.objectid = root_objectid;
922         key.type = BTRFS_ROOT_ITEM_KEY;
923         key.offset = trans->transid;
924         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
925                                 &key, &root_item);
926
927         key.offset = (u64)-1;
928         new_root = btrfs_read_fs_root(root->fs_info, &key);
929         if (!new_root || IS_ERR(new_root)) {
930                 error("unable to fs read root: %lu", PTR_ERR(new_root));
931                 return PTR_ERR(new_root);
932         }
933
934         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
935
936         return ret;
937 }
938
939 /*
940  * New make_btrfs() has handle system and meta chunks quite well.
941  * So only need to add remaining data chunks.
942  */
943 static int make_convert_data_block_groups(struct btrfs_trans_handle *trans,
944                                           struct btrfs_fs_info *fs_info,
945                                           struct btrfs_mkfs_config *cfg,
946                                           struct btrfs_convert_context *cctx)
947 {
948         struct btrfs_root *extent_root = fs_info->extent_root;
949         struct cache_tree *data_chunks = &cctx->data_chunks;
950         struct cache_extent *cache;
951         u64 max_chunk_size;
952         int ret = 0;
953
954         /*
955          * Don't create data chunk over 10% of the convert device
956          * And for single chunk, don't create chunk larger than 1G.
957          */
958         max_chunk_size = cfg->num_bytes / 10;
959         max_chunk_size = min((u64)(1024 * 1024 * 1024), max_chunk_size);
960         max_chunk_size = round_down(max_chunk_size, extent_root->sectorsize);
961
962         for (cache = first_cache_extent(data_chunks); cache;
963              cache = next_cache_extent(cache)) {
964                 u64 cur = cache->start;
965
966                 while (cur < cache->start + cache->size) {
967                         u64 len;
968                         u64 cur_backup = cur;
969
970                         len = min(max_chunk_size,
971                                   cache->start + cache->size - cur);
972                         ret = btrfs_alloc_data_chunk(trans, extent_root,
973                                         &cur_backup, len,
974                                         BTRFS_BLOCK_GROUP_DATA, 1);
975                         if (ret < 0)
976                                 break;
977                         ret = btrfs_make_block_group(trans, extent_root, 0,
978                                         BTRFS_BLOCK_GROUP_DATA,
979                                         BTRFS_FIRST_CHUNK_TREE_OBJECTID,
980                                         cur, len);
981                         if (ret < 0)
982                                 break;
983                         cur += len;
984                 }
985         }
986         return ret;
987 }
988
989 /*
990  * Init the temp btrfs to a operational status.
991  *
992  * It will fix the extent usage accounting(XXX: Do we really need?) and
993  * insert needed data chunks, to ensure all old fs data extents are covered
994  * by DATA chunks, preventing wrong chunks are allocated.
995  *
996  * And also create convert image subvolume and relocation tree.
997  * (XXX: Not need again?)
998  * But the convert image subvolume is *NOT* linked to fs tree yet.
999  */
1000 static int init_btrfs(struct btrfs_mkfs_config *cfg, struct btrfs_root *root,
1001                          struct btrfs_convert_context *cctx, u32 convert_flags)
1002 {
1003         struct btrfs_key location;
1004         struct btrfs_trans_handle *trans;
1005         struct btrfs_fs_info *fs_info = root->fs_info;
1006         int ret;
1007
1008         /*
1009          * Don't alloc any metadata/system chunk, as we don't want
1010          * any meta/sys chunk allcated before all data chunks are inserted.
1011          * Or we screw up the chunk layout just like the old implement.
1012          */
1013         fs_info->avoid_sys_chunk_alloc = 1;
1014         fs_info->avoid_meta_chunk_alloc = 1;
1015         trans = btrfs_start_transaction(root, 1);
1016         if (!trans) {
1017                 error("unable to start transaction");
1018                 ret = -EINVAL;
1019                 goto err;
1020         }
1021         ret = btrfs_fix_block_accounting(trans, root);
1022         if (ret)
1023                 goto err;
1024         ret = make_convert_data_block_groups(trans, fs_info, cfg, cctx);
1025         if (ret)
1026                 goto err;
1027         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1028                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1029         if (ret)
1030                 goto err;
1031         memcpy(&location, &root->root_key, sizeof(location));
1032         location.offset = (u64)-1;
1033         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1034                                 btrfs_super_root_dir(fs_info->super_copy),
1035                                 &location, BTRFS_FT_DIR, 0);
1036         if (ret)
1037                 goto err;
1038         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1039                                 location.objectid,
1040                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1041         if (ret)
1042                 goto err;
1043         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1044                              BTRFS_FIRST_FREE_OBJECTID);
1045
1046         /* subvol for fs image file */
1047         ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
1048         if (ret < 0) {
1049                 error("failed to create subvolume image root: %d", ret);
1050                 goto err;
1051         }
1052         /* subvol for data relocation tree */
1053         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1054         if (ret < 0) {
1055                 error("failed to create DATA_RELOC root: %d", ret);
1056                 goto err;
1057         }
1058
1059         ret = btrfs_commit_transaction(trans, root);
1060         fs_info->avoid_sys_chunk_alloc = 0;
1061         fs_info->avoid_meta_chunk_alloc = 0;
1062 err:
1063         return ret;
1064 }
1065
1066 /*
1067  * Migrate super block to its default position and zero 0 ~ 16k
1068  */
1069 static int migrate_super_block(int fd, u64 old_bytenr)
1070 {
1071         int ret;
1072         struct extent_buffer *buf;
1073         struct btrfs_super_block *super;
1074         u32 len;
1075         u32 bytenr;
1076
1077         buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE);
1078         if (!buf)
1079                 return -ENOMEM;
1080
1081         buf->len = BTRFS_SUPER_INFO_SIZE;
1082         ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, old_bytenr);
1083         if (ret != BTRFS_SUPER_INFO_SIZE)
1084                 goto fail;
1085
1086         super = (struct btrfs_super_block *)buf->data;
1087         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1088         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1089
1090         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1091         ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE,
1092                 BTRFS_SUPER_INFO_OFFSET);
1093         if (ret != BTRFS_SUPER_INFO_SIZE)
1094                 goto fail;
1095
1096         ret = fsync(fd);
1097         if (ret)
1098                 goto fail;
1099
1100         memset(buf->data, 0, BTRFS_SUPER_INFO_SIZE);
1101         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
1102                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
1103                 if (len > BTRFS_SUPER_INFO_SIZE)
1104                         len = BTRFS_SUPER_INFO_SIZE;
1105                 ret = pwrite(fd, buf->data, len, bytenr);
1106                 if (ret != len) {
1107                         fprintf(stderr, "unable to zero fill device\n");
1108                         break;
1109                 }
1110                 bytenr += len;
1111         }
1112         ret = 0;
1113         fsync(fd);
1114 fail:
1115         free(buf);
1116         if (ret > 0)
1117                 ret = -1;
1118         return ret;
1119 }
1120
1121 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
1122 {
1123         struct btrfs_chunk *chunk;
1124         struct btrfs_disk_key *key;
1125         u32 sectorsize = btrfs_super_sectorsize(super);
1126
1127         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1128         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1129                                        sizeof(struct btrfs_disk_key));
1130
1131         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1132         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1133         btrfs_set_disk_key_offset(key, 0);
1134
1135         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
1136         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1137         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1138         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1139         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1140         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1141         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1142         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1143         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1144         chunk->stripe.devid = super->dev_item.devid;
1145         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1146         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1147         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1148         return 0;
1149 }
1150
1151 static int convert_open_fs(const char *devname,
1152                            struct btrfs_convert_context *cctx)
1153 {
1154         int i;
1155
1156         for (i = 0; i < ARRAY_SIZE(convert_operations); i++) {
1157                 int ret = convert_operations[i]->open_fs(cctx, devname);
1158
1159                 if (ret == 0) {
1160                         cctx->convert_ops = convert_operations[i];
1161                         return ret;
1162                 }
1163         }
1164
1165         error("no file system found to convert");
1166         return -1;
1167 }
1168
1169 static int do_convert(const char *devname, u32 convert_flags, u32 nodesize,
1170                 const char *fslabel, int progress, u64 features)
1171 {
1172         int ret;
1173         int fd = -1;
1174         u32 blocksize;
1175         u64 total_bytes;
1176         struct btrfs_root *root;
1177         struct btrfs_root *image_root;
1178         struct btrfs_convert_context cctx;
1179         struct btrfs_key key;
1180         char subvol_name[SOURCE_FS_NAME_LEN + 8];
1181         struct task_ctx ctx;
1182         char features_buf[64];
1183         struct btrfs_mkfs_config mkfs_cfg;
1184
1185         init_convert_context(&cctx);
1186         ret = convert_open_fs(devname, &cctx);
1187         if (ret)
1188                 goto fail;
1189         ret = convert_check_state(&cctx);
1190         if (ret)
1191                 warning(
1192                 "source filesystem is not clean, running filesystem check is recommended");
1193         ret = convert_read_used_space(&cctx);
1194         if (ret)
1195                 goto fail;
1196
1197         blocksize = cctx.blocksize;
1198         total_bytes = (u64)blocksize * (u64)cctx.block_count;
1199         if (blocksize < 4096) {
1200                 error("block size is too small: %u < 4096", blocksize);
1201                 goto fail;
1202         }
1203         if (btrfs_check_nodesize(nodesize, blocksize, features))
1204                 goto fail;
1205         fd = open(devname, O_RDWR);
1206         if (fd < 0) {
1207                 error("unable to open %s: %s", devname, strerror(errno));
1208                 goto fail;
1209         }
1210         btrfs_parse_features_to_string(features_buf, features);
1211         if (features == BTRFS_MKFS_DEFAULT_FEATURES)
1212                 strcat(features_buf, " (default)");
1213
1214         printf("create btrfs filesystem:\n");
1215         printf("\tblocksize: %u\n", blocksize);
1216         printf("\tnodesize:  %u\n", nodesize);
1217         printf("\tfeatures:  %s\n", features_buf);
1218
1219         memset(&mkfs_cfg, 0, sizeof(mkfs_cfg));
1220         mkfs_cfg.label = cctx.volume_name;
1221         mkfs_cfg.num_bytes = total_bytes;
1222         mkfs_cfg.nodesize = nodesize;
1223         mkfs_cfg.sectorsize = blocksize;
1224         mkfs_cfg.stripesize = blocksize;
1225         mkfs_cfg.features = features;
1226
1227         ret = make_convert_btrfs(fd, &mkfs_cfg, &cctx);
1228         if (ret) {
1229                 error("unable to create initial ctree: %s", strerror(-ret));
1230                 goto fail;
1231         }
1232
1233         root = open_ctree_fd(fd, devname, mkfs_cfg.super_bytenr,
1234                              OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1235         if (!root) {
1236                 error("unable to open ctree");
1237                 goto fail;
1238         }
1239         ret = init_btrfs(&mkfs_cfg, root, &cctx, convert_flags);
1240         if (ret) {
1241                 error("unable to setup the root tree: %d", ret);
1242                 goto fail;
1243         }
1244
1245         printf("creating %s image file\n", cctx.convert_ops->name);
1246         snprintf(subvol_name, sizeof(subvol_name), "%s_saved",
1247                         cctx.convert_ops->name);
1248         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1249         key.offset = (u64)-1;
1250         key.type = BTRFS_ROOT_ITEM_KEY;
1251         image_root = btrfs_read_fs_root(root->fs_info, &key);
1252         if (!image_root) {
1253                 error("unable to create image subvolume");
1254                 goto fail;
1255         }
1256         ret = create_image(image_root, &mkfs_cfg, &cctx, fd,
1257                               mkfs_cfg.num_bytes, "image",
1258                               convert_flags);
1259         if (ret) {
1260                 error("failed to create %s/image: %d", subvol_name, ret);
1261                 goto fail;
1262         }
1263
1264         printf("creating btrfs metadata");
1265         ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
1266         ctx.cur_copy_inodes = 0;
1267
1268         if (progress) {
1269                 ctx.info = task_init(print_copied_inodes, after_copied_inodes,
1270                                      &ctx);
1271                 task_start(ctx.info);
1272         }
1273         ret = copy_inodes(&cctx, root, convert_flags, &ctx);
1274         if (ret) {
1275                 error("error during copy_inodes %d", ret);
1276                 goto fail;
1277         }
1278         if (progress) {
1279                 task_stop(ctx.info);
1280                 task_deinit(ctx.info);
1281         }
1282
1283         image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
1284         if (!image_root) {
1285                 error("unable to link subvolume %s", subvol_name);
1286                 goto fail;
1287         }
1288
1289         memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
1290         if (convert_flags & CONVERT_FLAG_COPY_LABEL) {
1291                 __strncpy_null(root->fs_info->super_copy->label,
1292                                 cctx.volume_name, BTRFS_LABEL_SIZE - 1);
1293                 printf("copy label '%s'\n", root->fs_info->super_copy->label);
1294         } else if (convert_flags & CONVERT_FLAG_SET_LABEL) {
1295                 strcpy(root->fs_info->super_copy->label, fslabel);
1296                 printf("set label to '%s'\n", fslabel);
1297         }
1298
1299         ret = close_ctree(root);
1300         if (ret) {
1301                 error("close_ctree failed: %d", ret);
1302                 goto fail;
1303         }
1304         convert_close_fs(&cctx);
1305         clean_convert_context(&cctx);
1306
1307         /*
1308          * If this step succeed, we get a mountable btrfs. Otherwise
1309          * the source fs is left unchanged.
1310          */
1311         ret = migrate_super_block(fd, mkfs_cfg.super_bytenr);
1312         if (ret) {
1313                 error("unable to migrate super block: %d", ret);
1314                 goto fail;
1315         }
1316
1317         root = open_ctree_fd(fd, devname, 0,
1318                         OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1319         if (!root) {
1320                 error("unable to open ctree for finalization");
1321                 goto fail;
1322         }
1323         root->fs_info->finalize_on_close = 1;
1324         close_ctree(root);
1325         close(fd);
1326
1327         printf("conversion complete");
1328         return 0;
1329 fail:
1330         clean_convert_context(&cctx);
1331         if (fd != -1)
1332                 close(fd);
1333         warning(
1334 "an error occurred during conversion, filesystem is partially created but not finalized and not mountable");
1335         return -1;
1336 }
1337
1338 /*
1339  * Check if a non 1:1 mapped chunk can be rolled back.
1340  * For new convert, it's OK while for old convert it's not.
1341  */
1342 static int may_rollback_chunk(struct btrfs_fs_info *fs_info, u64 bytenr)
1343 {
1344         struct btrfs_block_group_cache *bg;
1345         struct btrfs_key key;
1346         struct btrfs_path path;
1347         struct btrfs_root *extent_root = fs_info->extent_root;
1348         u64 bg_start;
1349         u64 bg_end;
1350         int ret;
1351
1352         bg = btrfs_lookup_first_block_group(fs_info, bytenr);
1353         if (!bg)
1354                 return -ENOENT;
1355         bg_start = bg->key.objectid;
1356         bg_end = bg->key.objectid + bg->key.offset;
1357
1358         key.objectid = bg_end;
1359         key.type = BTRFS_METADATA_ITEM_KEY;
1360         key.offset = 0;
1361         btrfs_init_path(&path);
1362
1363         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
1364         if (ret < 0)
1365                 return ret;
1366
1367         while (1) {
1368                 struct btrfs_extent_item *ei;
1369
1370                 ret = btrfs_previous_extent_item(extent_root, &path, bg_start);
1371                 if (ret > 0) {
1372                         ret = 0;
1373                         break;
1374                 }
1375                 if (ret < 0)
1376                         break;
1377
1378                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1379                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1380                         continue;
1381                 /* Now it's EXTENT_ITEM_KEY only */
1382                 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
1383                                     struct btrfs_extent_item);
1384                 /*
1385                  * Found data extent, means this is old convert must follow 1:1
1386                  * mapping.
1387                  */
1388                 if (btrfs_extent_flags(path.nodes[0], ei)
1389                                 & BTRFS_EXTENT_FLAG_DATA) {
1390                         ret = -EINVAL;
1391                         break;
1392                 }
1393         }
1394         btrfs_release_path(&path);
1395         return ret;
1396 }
1397
1398 static int may_rollback(struct btrfs_root *root)
1399 {
1400         struct btrfs_fs_info *info = root->fs_info;
1401         struct btrfs_multi_bio *multi = NULL;
1402         u64 bytenr;
1403         u64 length;
1404         u64 physical;
1405         u64 total_bytes;
1406         int num_stripes;
1407         int ret;
1408
1409         if (btrfs_super_num_devices(info->super_copy) != 1)
1410                 goto fail;
1411
1412         bytenr = BTRFS_SUPER_INFO_OFFSET;
1413         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1414
1415         while (1) {
1416                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
1417                                       &length, &multi, 0, NULL);
1418                 if (ret) {
1419                         if (ret == -ENOENT) {
1420                                 /* removed block group at the tail */
1421                                 if (length == (u64)-1)
1422                                         break;
1423
1424                                 /* removed block group in the middle */
1425                                 goto next;
1426                         }
1427                         goto fail;
1428                 }
1429
1430                 num_stripes = multi->num_stripes;
1431                 physical = multi->stripes[0].physical;
1432                 free(multi);
1433
1434                 if (num_stripes != 1) {
1435                         error("num stripes for bytenr %llu is not 1", bytenr);
1436                         goto fail;
1437                 }
1438
1439                 /*
1440                  * Extra check for new convert, as metadata chunk from new
1441                  * convert is much more free than old convert, it doesn't need
1442                  * to do 1:1 mapping.
1443                  */
1444                 if (physical != bytenr) {
1445                         /*
1446                          * Check if it's a metadata chunk and has only metadata
1447                          * extent.
1448                          */
1449                         ret = may_rollback_chunk(info, bytenr);
1450                         if (ret < 0)
1451                                 goto fail;
1452                 }
1453 next:
1454                 bytenr += length;
1455                 if (bytenr >= total_bytes)
1456                         break;
1457         }
1458         return 0;
1459 fail:
1460         return -1;
1461 }
1462
1463 static int do_rollback(const char *devname)
1464 {
1465         int fd = -1;
1466         int ret;
1467         int i;
1468         struct btrfs_root *root;
1469         struct btrfs_root *image_root;
1470         struct btrfs_root *chunk_root;
1471         struct btrfs_dir_item *dir;
1472         struct btrfs_inode_item *inode;
1473         struct btrfs_file_extent_item *fi;
1474         struct btrfs_trans_handle *trans;
1475         struct extent_buffer *leaf;
1476         struct btrfs_block_group_cache *cache1;
1477         struct btrfs_block_group_cache *cache2;
1478         struct btrfs_key key;
1479         struct btrfs_path path;
1480         struct extent_io_tree io_tree;
1481         char *buf = NULL;
1482         char *name;
1483         u64 bytenr;
1484         u64 num_bytes;
1485         u64 root_dir;
1486         u64 objectid;
1487         u64 offset;
1488         u64 start;
1489         u64 end;
1490         u64 sb_bytenr;
1491         u64 first_free;
1492         u64 total_bytes;
1493         u32 sectorsize;
1494
1495         extent_io_tree_init(&io_tree);
1496
1497         fd = open(devname, O_RDWR);
1498         if (fd < 0) {
1499                 error("unable to open %s: %s", devname, strerror(errno));
1500                 goto fail;
1501         }
1502         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
1503         if (!root) {
1504                 error("unable to open ctree");
1505                 goto fail;
1506         }
1507         ret = may_rollback(root);
1508         if (ret < 0) {
1509                 error("unable to do rollback: %d", ret);
1510                 goto fail;
1511         }
1512
1513         sectorsize = root->sectorsize;
1514         buf = malloc(sectorsize);
1515         if (!buf) {
1516                 error("unable to allocate memory");
1517                 goto fail;
1518         }
1519
1520         btrfs_init_path(&path);
1521
1522         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1523         key.type = BTRFS_ROOT_BACKREF_KEY;
1524         key.offset = BTRFS_FS_TREE_OBJECTID;
1525         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0,
1526                                 0);
1527         btrfs_release_path(&path);
1528         if (ret > 0) {
1529                 error("unable to convert ext2 image subvolume, is it deleted?");
1530                 goto fail;
1531         } else if (ret < 0) {
1532                 error("unable to open ext2_saved, id %llu: %s",
1533                         (unsigned long long)key.objectid, strerror(-ret));
1534                 goto fail;
1535         }
1536
1537         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1538         key.type = BTRFS_ROOT_ITEM_KEY;
1539         key.offset = (u64)-1;
1540         image_root = btrfs_read_fs_root(root->fs_info, &key);
1541         if (!image_root || IS_ERR(image_root)) {
1542                 error("unable to open subvolume %llu: %ld",
1543                         (unsigned long long)key.objectid, PTR_ERR(image_root));
1544                 goto fail;
1545         }
1546
1547         name = "image";
1548         root_dir = btrfs_root_dirid(&root->root_item);
1549         dir = btrfs_lookup_dir_item(NULL, image_root, &path,
1550                                    root_dir, name, strlen(name), 0);
1551         if (!dir || IS_ERR(dir)) {
1552                 error("unable to find file %s: %ld", name, PTR_ERR(dir));
1553                 goto fail;
1554         }
1555         leaf = path.nodes[0];
1556         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
1557         btrfs_release_path(&path);
1558
1559         objectid = key.objectid;
1560
1561         ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0);
1562         if (ret) {
1563                 error("unable to find inode item: %d", ret);
1564                 goto fail;
1565         }
1566         leaf = path.nodes[0];
1567         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
1568         total_bytes = btrfs_inode_size(leaf, inode);
1569         btrfs_release_path(&path);
1570
1571         key.objectid = objectid;
1572         key.offset = 0;
1573         key.type = BTRFS_EXTENT_DATA_KEY;
1574         ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0);
1575         if (ret != 0) {
1576                 error("unable to find first file extent: %d", ret);
1577                 btrfs_release_path(&path);
1578                 goto fail;
1579         }
1580
1581         /* build mapping tree for the relocated blocks */
1582         for (offset = 0; offset < total_bytes; ) {
1583                 leaf = path.nodes[0];
1584                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1585                         ret = btrfs_next_leaf(root, &path);
1586                         if (ret != 0)
1587                                 break;  
1588                         continue;
1589                 }
1590
1591                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1592                 if (key.objectid != objectid || key.offset != offset ||
1593                     key.type != BTRFS_EXTENT_DATA_KEY)
1594                         break;
1595
1596                 fi = btrfs_item_ptr(leaf, path.slots[0],
1597                                     struct btrfs_file_extent_item);
1598                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
1599                         break;
1600                 if (btrfs_file_extent_compression(leaf, fi) ||
1601                     btrfs_file_extent_encryption(leaf, fi) ||
1602                     btrfs_file_extent_other_encoding(leaf, fi))
1603                         break;
1604
1605                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1606                 /* skip holes and direct mapped extents */
1607                 if (bytenr == 0 || bytenr == offset)
1608                         goto next_extent;
1609
1610                 bytenr += btrfs_file_extent_offset(leaf, fi);
1611                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
1612
1613                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1614                 cache2 = btrfs_lookup_block_group(root->fs_info,
1615                                                   offset + num_bytes - 1);
1616                 /*
1617                  * Here we must take consideration of old and new convert
1618                  * behavior.
1619                  * For old convert case, sign, there is no consist chunk type
1620                  * that will cover the extent. META/DATA/SYS are all possible.
1621                  * Just ensure relocate one is in SYS chunk.
1622                  * For new convert case, they are all covered by DATA chunk.
1623                  *
1624                  * So, there is not valid chunk type check for it now.
1625                  */
1626                 if (cache1 != cache2)
1627                         break;
1628
1629                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
1630                                 EXTENT_LOCKED, GFP_NOFS);
1631                 set_state_private(&io_tree, offset, bytenr);
1632 next_extent:
1633                 offset += btrfs_file_extent_num_bytes(leaf, fi);
1634                 path.slots[0]++;
1635         }
1636         btrfs_release_path(&path);
1637
1638         if (offset < total_bytes) {
1639                 error("unable to build extent mapping (offset %llu, total_bytes %llu)",
1640                                 (unsigned long long)offset,
1641                                 (unsigned long long)total_bytes);
1642                 error("converted filesystem after balance is unable to rollback");
1643                 goto fail;
1644         }
1645
1646         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
1647         first_free &= ~((u64)sectorsize - 1);
1648         /* backup for extent #0 should exist */
1649         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
1650                 error("no backup for the first extent");
1651                 goto fail;
1652         }
1653         /* force no allocation from system block group */
1654         root->fs_info->system_allocs = -1;
1655         trans = btrfs_start_transaction(root, 1);
1656         if (!trans) {
1657                 error("unable to start transaction");
1658                 goto fail;
1659         }
1660         /*
1661          * recow the whole chunk tree, this will remove all chunk tree blocks
1662          * from system block group
1663          */
1664         chunk_root = root->fs_info->chunk_root;
1665         memset(&key, 0, sizeof(key));
1666         while (1) {
1667                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
1668                 if (ret < 0)
1669                         break;
1670
1671                 ret = btrfs_next_leaf(chunk_root, &path);
1672                 if (ret)
1673                         break;
1674
1675                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1676                 btrfs_release_path(&path);
1677         }
1678         btrfs_release_path(&path);
1679
1680         offset = 0;
1681         num_bytes = 0;
1682         while(1) {
1683                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1684                 if (!cache1)
1685                         break;
1686
1687                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
1688                         num_bytes += btrfs_block_group_used(&cache1->item);
1689
1690                 offset = cache1->key.objectid + cache1->key.offset;
1691         }
1692         /* only extent #0 left in system block group? */
1693         if (num_bytes > first_free) {
1694                 error(
1695         "unable to empty system block group (num_bytes %llu, first_free %llu",
1696                                 (unsigned long long)num_bytes,
1697                                 (unsigned long long)first_free);
1698                 goto fail;
1699         }
1700         /* create a system chunk that maps the whole device */
1701         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
1702         if (ret) {
1703                 error("unable to update system chunk: %d", ret);
1704                 goto fail;
1705         }
1706
1707         ret = btrfs_commit_transaction(trans, root);
1708         if (ret) {
1709                 error("transaction commit failed: %d", ret);
1710                 goto fail;
1711         }
1712
1713         ret = close_ctree(root);
1714         if (ret) {
1715                 error("close_ctree failed: %d", ret);
1716                 goto fail;
1717         }
1718
1719         /* zero btrfs super block mirrors */
1720         memset(buf, 0, sectorsize);
1721         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1722                 bytenr = btrfs_sb_offset(i);
1723                 if (bytenr >= total_bytes)
1724                         break;
1725                 ret = pwrite(fd, buf, sectorsize, bytenr);
1726                 if (ret != sectorsize) {
1727                         error("zeroing superblock mirror %d failed: %d",
1728                                         i, ret);
1729                         goto fail;
1730                 }
1731         }
1732
1733         sb_bytenr = (u64)-1;
1734         /* copy all relocated blocks back */
1735         while(1) {
1736                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
1737                                             EXTENT_LOCKED);
1738                 if (ret)
1739                         break;
1740
1741                 ret = get_state_private(&io_tree, start, &bytenr);
1742                 BUG_ON(ret);
1743
1744                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
1745                                   GFP_NOFS);
1746
1747                 while (start <= end) {
1748                         if (start == BTRFS_SUPER_INFO_OFFSET) {
1749                                 sb_bytenr = bytenr;
1750                                 goto next_sector;
1751                         }
1752                         ret = pread(fd, buf, sectorsize, bytenr);
1753                         if (ret < 0) {
1754                                 error("reading superblock at %llu failed: %d",
1755                                                 (unsigned long long)bytenr, ret);
1756                                 goto fail;
1757                         }
1758                         BUG_ON(ret != sectorsize);
1759                         ret = pwrite(fd, buf, sectorsize, start);
1760                         if (ret < 0) {
1761                                 error("writing superblock at %llu failed: %d",
1762                                                 (unsigned long long)start, ret);
1763                                 goto fail;
1764                         }
1765                         BUG_ON(ret != sectorsize);
1766 next_sector:
1767                         start += sectorsize;
1768                         bytenr += sectorsize;
1769                 }
1770         }
1771
1772         ret = fsync(fd);
1773         if (ret < 0) {
1774                 error("fsync failed: %s", strerror(errno));
1775                 goto fail;
1776         }
1777         /*
1778          * finally, overwrite btrfs super block.
1779          */
1780         ret = pread(fd, buf, sectorsize, sb_bytenr);
1781         if (ret < 0) {
1782                 error("reading primary superblock failed: %s",
1783                                 strerror(errno));
1784                 goto fail;
1785         }
1786         BUG_ON(ret != sectorsize);
1787         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1788         if (ret < 0) {
1789                 error("writing primary superblock failed: %s",
1790                                 strerror(errno));
1791                 goto fail;
1792         }
1793         BUG_ON(ret != sectorsize);
1794         ret = fsync(fd);
1795         if (ret < 0) {
1796                 error("fsync failed: %s", strerror(errno));
1797                 goto fail;
1798         }
1799
1800         close(fd);
1801         free(buf);
1802         extent_io_tree_cleanup(&io_tree);
1803         printf("rollback complete\n");
1804         return 0;
1805
1806 fail:
1807         if (fd != -1)
1808                 close(fd);
1809         free(buf);
1810         error("rollback aborted");
1811         return -1;
1812 }
1813
1814 static void print_usage(void)
1815 {
1816         printf("usage: btrfs-convert [options] device\n");
1817         printf("options:\n");
1818         printf("\t-d|--no-datasum        disable data checksum, sets NODATASUM\n");
1819         printf("\t-i|--no-xattr          ignore xattrs and ACLs\n");
1820         printf("\t-n|--no-inline         disable inlining of small files to metadata\n");
1821         printf("\t-N|--nodesize SIZE     set filesystem metadata nodesize\n");
1822         printf("\t-r|--rollback          roll back to the original filesystem\n");
1823         printf("\t-l|--label LABEL       set filesystem label\n");
1824         printf("\t-L|--copy-label        use label from converted filesystem\n");
1825         printf("\t-p|--progress          show converting progress (default)\n");
1826         printf("\t-O|--features LIST     comma separated list of filesystem features\n");
1827         printf("\t--no-progress          show only overview, not the detailed progress\n");
1828         printf("\n");
1829         printf("Supported filesystems:\n");
1830         printf("\text2/3/4: %s\n", BTRFSCONVERT_EXT2 ? "yes" : "no");
1831 }
1832
1833 int main(int argc, char *argv[])
1834 {
1835         int ret;
1836         int packing = 1;
1837         int noxattr = 0;
1838         int datacsum = 1;
1839         u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
1840                         BTRFS_MKFS_DEFAULT_NODE_SIZE);
1841         int rollback = 0;
1842         int copylabel = 0;
1843         int usage_error = 0;
1844         int progress = 1;
1845         char *file;
1846         char fslabel[BTRFS_LABEL_SIZE];
1847         u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
1848
1849         while(1) {
1850                 enum { GETOPT_VAL_NO_PROGRESS = 256 };
1851                 static const struct option long_options[] = {
1852                         { "no-progress", no_argument, NULL,
1853                                 GETOPT_VAL_NO_PROGRESS },
1854                         { "no-datasum", no_argument, NULL, 'd' },
1855                         { "no-inline", no_argument, NULL, 'n' },
1856                         { "no-xattr", no_argument, NULL, 'i' },
1857                         { "rollback", no_argument, NULL, 'r' },
1858                         { "features", required_argument, NULL, 'O' },
1859                         { "progress", no_argument, NULL, 'p' },
1860                         { "label", required_argument, NULL, 'l' },
1861                         { "copy-label", no_argument, NULL, 'L' },
1862                         { "nodesize", required_argument, NULL, 'N' },
1863                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
1864                         { NULL, 0, NULL, 0 }
1865                 };
1866                 int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL);
1867
1868                 if (c < 0)
1869                         break;
1870                 switch(c) {
1871                         case 'd':
1872                                 datacsum = 0;
1873                                 break;
1874                         case 'i':
1875                                 noxattr = 1;
1876                                 break;
1877                         case 'n':
1878                                 packing = 0;
1879                                 break;
1880                         case 'N':
1881                                 nodesize = parse_size(optarg);
1882                                 break;
1883                         case 'r':
1884                                 rollback = 1;
1885                                 break;
1886                         case 'l':
1887                                 copylabel = CONVERT_FLAG_SET_LABEL;
1888                                 if (strlen(optarg) >= BTRFS_LABEL_SIZE) {
1889                                         warning(
1890                                         "label too long, trimmed to %d bytes",
1891                                                 BTRFS_LABEL_SIZE - 1);
1892                                 }
1893                                 __strncpy_null(fslabel, optarg, BTRFS_LABEL_SIZE - 1);
1894                                 break;
1895                         case 'L':
1896                                 copylabel = CONVERT_FLAG_COPY_LABEL;
1897                                 break;
1898                         case 'p':
1899                                 progress = 1;
1900                                 break;
1901                         case 'O': {
1902                                 char *orig = strdup(optarg);
1903                                 char *tmp = orig;
1904
1905                                 tmp = btrfs_parse_fs_features(tmp, &features);
1906                                 if (tmp) {
1907                                         error("unrecognized filesystem feature: %s",
1908                                                         tmp);
1909                                         free(orig);
1910                                         exit(1);
1911                                 }
1912                                 free(orig);
1913                                 if (features & BTRFS_FEATURE_LIST_ALL) {
1914                                         btrfs_list_all_fs_features(
1915                                                 ~BTRFS_CONVERT_ALLOWED_FEATURES);
1916                                         exit(0);
1917                                 }
1918                                 if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) {
1919                                         char buf[64];
1920
1921                                         btrfs_parse_features_to_string(buf,
1922                                                 features & ~BTRFS_CONVERT_ALLOWED_FEATURES);
1923                                         error("features not allowed for convert: %s",
1924                                                 buf);
1925                                         exit(1);
1926                                 }
1927
1928                                 break;
1929                                 }
1930                         case GETOPT_VAL_NO_PROGRESS:
1931                                 progress = 0;
1932                                 break;
1933                         case GETOPT_VAL_HELP:
1934                         default:
1935                                 print_usage();
1936                                 return c != GETOPT_VAL_HELP;
1937                 }
1938         }
1939         set_argv0(argv);
1940         if (check_argc_exact(argc - optind, 1)) {
1941                 print_usage();
1942                 return 1;
1943         }
1944
1945         if (rollback && (!datacsum || noxattr || !packing)) {
1946                 fprintf(stderr,
1947                         "Usage error: -d, -i, -n options do not apply to rollback\n");
1948                 usage_error++;
1949         }
1950
1951         if (usage_error) {
1952                 print_usage();
1953                 return 1;
1954         }
1955
1956         file = argv[optind];
1957         ret = check_mounted(file);
1958         if (ret < 0) {
1959                 error("could not check mount status: %s", strerror(-ret));
1960                 return 1;
1961         } else if (ret) {
1962                 error("%s is mounted", file);
1963                 return 1;
1964         }
1965
1966         if (rollback) {
1967                 ret = do_rollback(file);
1968         } else {
1969                 u32 cf = 0;
1970
1971                 cf |= datacsum ? CONVERT_FLAG_DATACSUM : 0;
1972                 cf |= packing ? CONVERT_FLAG_INLINE_DATA : 0;
1973                 cf |= noxattr ? 0 : CONVERT_FLAG_XATTR;
1974                 cf |= copylabel;
1975                 ret = do_convert(file, cf, nodesize, fslabel, progress, features);
1976         }
1977         if (ret)
1978                 return 1;
1979         return 0;
1980 }