btrfs-progs: convert: remove unused includes
[platform/upstream/btrfs-progs.git] / convert / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include "kerncompat.h"
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <getopt.h>
27
28 #include "ctree.h"
29 #include "disk-io.h"
30 #include "volumes.h"
31 #include "transaction.h"
32 #include "utils.h"
33 #include "task-utils.h"
34 #include "help.h"
35 #include "mkfs/common.h"
36 #include "convert/common.h"
37 #include "convert/source-fs.h"
38 #include "fsfeatures.h"
39
40 const struct btrfs_convert_operations ext2_convert_ops;
41
42 static const struct btrfs_convert_operations *convert_operations[] = {
43 #if BTRFSCONVERT_EXT2
44         &ext2_convert_ops,
45 #endif
46 };
47
48 static void *print_copied_inodes(void *p)
49 {
50         struct task_ctx *priv = p;
51         const char work_indicator[] = { '.', 'o', 'O', 'o' };
52         u64 count = 0;
53
54         task_period_start(priv->info, 1000 /* 1s */);
55         while (1) {
56                 count++;
57                 printf("copy inodes [%c] [%10llu/%10llu]\r",
58                        work_indicator[count % 4],
59                        (unsigned long long)priv->cur_copy_inodes,
60                        (unsigned long long)priv->max_copy_inodes);
61                 fflush(stdout);
62                 task_period_wait(priv->info);
63         }
64
65         return NULL;
66 }
67
68 static int after_copied_inodes(void *p)
69 {
70         printf("\n");
71         fflush(stdout);
72
73         return 0;
74 }
75
76 static inline int copy_inodes(struct btrfs_convert_context *cctx,
77                               struct btrfs_root *root, int datacsum,
78                               int packing, int noxattr, struct task_ctx *p)
79 {
80         return cctx->convert_ops->copy_inodes(cctx, root, datacsum, packing,
81                                              noxattr, p);
82 }
83
84 static inline void convert_close_fs(struct btrfs_convert_context *cctx)
85 {
86         cctx->convert_ops->close_fs(cctx);
87 }
88
89 static inline int convert_check_state(struct btrfs_convert_context *cctx)
90 {
91         return cctx->convert_ops->check_state(cctx);
92 }
93
94 static int csum_disk_extent(struct btrfs_trans_handle *trans,
95                             struct btrfs_root *root,
96                             u64 disk_bytenr, u64 num_bytes)
97 {
98         u32 blocksize = root->sectorsize;
99         u64 offset;
100         char *buffer;
101         int ret = 0;
102
103         buffer = malloc(blocksize);
104         if (!buffer)
105                 return -ENOMEM;
106         for (offset = 0; offset < num_bytes; offset += blocksize) {
107                 ret = read_disk_extent(root, disk_bytenr + offset,
108                                         blocksize, buffer);
109                 if (ret)
110                         break;
111                 ret = btrfs_csum_file_block(trans,
112                                             root->fs_info->csum_root,
113                                             disk_bytenr + num_bytes,
114                                             disk_bytenr + offset,
115                                             buffer, blocksize);
116                 if (ret)
117                         break;
118         }
119         free(buffer);
120         return ret;
121 }
122
123 static int create_image_file_range(struct btrfs_trans_handle *trans,
124                                       struct btrfs_root *root,
125                                       struct cache_tree *used,
126                                       struct btrfs_inode_item *inode,
127                                       u64 ino, u64 bytenr, u64 *ret_len,
128                                       int datacsum)
129 {
130         struct cache_extent *cache;
131         struct btrfs_block_group_cache *bg_cache;
132         u64 len = *ret_len;
133         u64 disk_bytenr;
134         int i;
135         int ret;
136
137         if (bytenr != round_down(bytenr, root->sectorsize)) {
138                 error("bytenr not sectorsize aligned: %llu",
139                                 (unsigned long long)bytenr);
140                 return -EINVAL;
141         }
142         if (len != round_down(len, root->sectorsize)) {
143                 error("length not sectorsize aligned: %llu",
144                                 (unsigned long long)len);
145                 return -EINVAL;
146         }
147         len = min_t(u64, len, BTRFS_MAX_EXTENT_SIZE);
148
149         /*
150          * Skip sb ranges first
151          * [0, 1M), [sb_offset(1), +64K), [sb_offset(2), +64K].
152          *
153          * Or we will insert a hole into current image file, and later
154          * migrate block will fail as there is already a file extent.
155          */
156         if (bytenr < 1024 * 1024) {
157                 *ret_len = 1024 * 1024 - bytenr;
158                 return 0;
159         }
160         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
161                 u64 cur = btrfs_sb_offset(i);
162
163                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
164                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
165                         return 0;
166                 }
167         }
168         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
169                 u64 cur = btrfs_sb_offset(i);
170
171                 /*
172                  *      |--reserved--|
173                  * |----range-------|
174                  * May still need to go through file extent inserts
175                  */
176                 if (bytenr < cur && bytenr + len >= cur) {
177                         len = min_t(u64, len, cur - bytenr);
178                         break;
179                 }
180                 /*
181                  * |--reserved--|
182                  *      |---range---|
183                  * Drop out, no need to insert anything
184                  */
185                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
186                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
187                         return 0;
188                 }
189         }
190
191         cache = search_cache_extent(used, bytenr);
192         if (cache) {
193                 if (cache->start <= bytenr) {
194                         /*
195                          * |///////Used///////|
196                          *      |<--insert--->|
197                          *      bytenr
198                          */
199                         len = min_t(u64, len, cache->start + cache->size -
200                                     bytenr);
201                         disk_bytenr = bytenr;
202                 } else {
203                         /*
204                          *              |//Used//|
205                          *  |<-insert-->|
206                          *  bytenr
207                          */
208                         len = min(len, cache->start - bytenr);
209                         disk_bytenr = 0;
210                         datacsum = 0;
211                 }
212         } else {
213                 /*
214                  * |//Used//|           |EOF
215                  *          |<-insert-->|
216                  *          bytenr
217                  */
218                 disk_bytenr = 0;
219                 datacsum = 0;
220         }
221
222         if (disk_bytenr) {
223                 /* Check if the range is in a data block group */
224                 bg_cache = btrfs_lookup_block_group(root->fs_info, bytenr);
225                 if (!bg_cache)
226                         return -ENOENT;
227                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
228                         return -EINVAL;
229
230                 /* The extent should never cross block group boundary */
231                 len = min_t(u64, len, bg_cache->key.objectid +
232                             bg_cache->key.offset - bytenr);
233         }
234
235         if (len != round_down(len, root->sectorsize)) {
236                 error("remaining length not sectorsize aligned: %llu",
237                                 (unsigned long long)len);
238                 return -EINVAL;
239         }
240         ret = btrfs_record_file_extent(trans, root, ino, inode, bytenr,
241                                        disk_bytenr, len);
242         if (ret < 0)
243                 return ret;
244
245         if (datacsum)
246                 ret = csum_disk_extent(trans, root, bytenr, len);
247         *ret_len = len;
248         return ret;
249 }
250
251 /*
252  * Relocate old fs data in one reserved ranges
253  *
254  * Since all old fs data in reserved range is not covered by any chunk nor
255  * data extent, we don't need to handle any reference but add new
256  * extent/reference, which makes codes more clear
257  */
258 static int migrate_one_reserved_range(struct btrfs_trans_handle *trans,
259                                       struct btrfs_root *root,
260                                       struct cache_tree *used,
261                                       struct btrfs_inode_item *inode, int fd,
262                                       u64 ino, u64 start, u64 len, int datacsum)
263 {
264         u64 cur_off = start;
265         u64 cur_len = len;
266         u64 hole_start = start;
267         u64 hole_len;
268         struct cache_extent *cache;
269         struct btrfs_key key;
270         struct extent_buffer *eb;
271         int ret = 0;
272
273         while (cur_off < start + len) {
274                 cache = lookup_cache_extent(used, cur_off, cur_len);
275                 if (!cache)
276                         break;
277                 cur_off = max(cache->start, cur_off);
278                 cur_len = min(cache->start + cache->size, start + len) -
279                           cur_off;
280                 BUG_ON(cur_len < root->sectorsize);
281
282                 /* reserve extent for the data */
283                 ret = btrfs_reserve_extent(trans, root, cur_len, 0, 0, (u64)-1,
284                                            &key, 1);
285                 if (ret < 0)
286                         break;
287
288                 eb = malloc(sizeof(*eb) + cur_len);
289                 if (!eb) {
290                         ret = -ENOMEM;
291                         break;
292                 }
293
294                 ret = pread(fd, eb->data, cur_len, cur_off);
295                 if (ret < cur_len) {
296                         ret = (ret < 0 ? ret : -EIO);
297                         free(eb);
298                         break;
299                 }
300                 eb->start = key.objectid;
301                 eb->len = key.offset;
302
303                 /* Write the data */
304                 ret = write_and_map_eb(trans, root, eb);
305                 free(eb);
306                 if (ret < 0)
307                         break;
308
309                 /* Now handle extent item and file extent things */
310                 ret = btrfs_record_file_extent(trans, root, ino, inode, cur_off,
311                                                key.objectid, key.offset);
312                 if (ret < 0)
313                         break;
314                 /* Finally, insert csum items */
315                 if (datacsum)
316                         ret = csum_disk_extent(trans, root, key.objectid,
317                                                key.offset);
318
319                 /* Don't forget to insert hole */
320                 hole_len = cur_off - hole_start;
321                 if (hole_len) {
322                         ret = btrfs_record_file_extent(trans, root, ino, inode,
323                                         hole_start, 0, hole_len);
324                         if (ret < 0)
325                                 break;
326                 }
327
328                 cur_off += key.offset;
329                 hole_start = cur_off;
330                 cur_len = start + len - cur_off;
331         }
332         /* Last hole */
333         if (start + len - hole_start > 0)
334                 ret = btrfs_record_file_extent(trans, root, ino, inode,
335                                 hole_start, 0, start + len - hole_start);
336         return ret;
337 }
338
339 /*
340  * Relocate the used ext2 data in reserved ranges
341  * [0,1M)
342  * [btrfs_sb_offset(1), +BTRFS_STRIPE_LEN)
343  * [btrfs_sb_offset(2), +BTRFS_STRIPE_LEN)
344  */
345 static int migrate_reserved_ranges(struct btrfs_trans_handle *trans,
346                                    struct btrfs_root *root,
347                                    struct cache_tree *used,
348                                    struct btrfs_inode_item *inode, int fd,
349                                    u64 ino, u64 total_bytes, int datacsum)
350 {
351         u64 cur_off;
352         u64 cur_len;
353         int ret = 0;
354
355         /* 0 ~ 1M */
356         cur_off = 0;
357         cur_len = 1024 * 1024;
358         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
359                                          cur_off, cur_len, datacsum);
360         if (ret < 0)
361                 return ret;
362
363         /* second sb(fisrt sb is included in 0~1M) */
364         cur_off = btrfs_sb_offset(1);
365         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
366         if (cur_off > total_bytes)
367                 return ret;
368         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
369                                          cur_off, cur_len, datacsum);
370         if (ret < 0)
371                 return ret;
372
373         /* Last sb */
374         cur_off = btrfs_sb_offset(2);
375         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
376         if (cur_off > total_bytes)
377                 return ret;
378         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
379                                          cur_off, cur_len, datacsum);
380         return ret;
381 }
382
383 /*
384  * Helper for expand and merge extent_cache for wipe_one_reserved_range() to
385  * handle wiping a range that exists in cache.
386  */
387 static int _expand_extent_cache(struct cache_tree *tree,
388                                 struct cache_extent *entry,
389                                 u64 min_stripe_size, int backward)
390 {
391         struct cache_extent *ce;
392         int diff;
393
394         if (entry->size >= min_stripe_size)
395                 return 0;
396         diff = min_stripe_size - entry->size;
397
398         if (backward) {
399                 ce = prev_cache_extent(entry);
400                 if (!ce)
401                         goto expand_back;
402                 if (ce->start + ce->size >= entry->start - diff) {
403                         /* Directly merge with previous extent */
404                         ce->size = entry->start + entry->size - ce->start;
405                         remove_cache_extent(tree, entry);
406                         free(entry);
407                         return 0;
408                 }
409 expand_back:
410                 /* No overlap, normal extent */
411                 if (entry->start < diff) {
412                         error("cannot find space for data chunk layout");
413                         return -ENOSPC;
414                 }
415                 entry->start -= diff;
416                 entry->size += diff;
417                 return 0;
418         }
419         ce = next_cache_extent(entry);
420         if (!ce)
421                 goto expand_after;
422         if (entry->start + entry->size + diff >= ce->start) {
423                 /* Directly merge with next extent */
424                 entry->size = ce->start + ce->size - entry->start;
425                 remove_cache_extent(tree, ce);
426                 free(ce);
427                 return 0;
428         }
429 expand_after:
430         entry->size += diff;
431         return 0;
432 }
433
434 /*
435  * Remove one reserve range from given cache tree
436  * if min_stripe_size is non-zero, it will ensure for split case,
437  * all its split cache extent is no smaller than @min_strip_size / 2.
438  */
439 static int wipe_one_reserved_range(struct cache_tree *tree,
440                                    u64 start, u64 len, u64 min_stripe_size,
441                                    int ensure_size)
442 {
443         struct cache_extent *cache;
444         int ret;
445
446         BUG_ON(ensure_size && min_stripe_size == 0);
447         /*
448          * The logical here is simplified to handle special cases only
449          * So we don't need to consider merge case for ensure_size
450          */
451         BUG_ON(min_stripe_size && (min_stripe_size < len * 2 ||
452                min_stripe_size / 2 < BTRFS_STRIPE_LEN));
453
454         /* Also, wipe range should already be aligned */
455         BUG_ON(start != round_down(start, BTRFS_STRIPE_LEN) ||
456                start + len != round_up(start + len, BTRFS_STRIPE_LEN));
457
458         min_stripe_size /= 2;
459
460         cache = lookup_cache_extent(tree, start, len);
461         if (!cache)
462                 return 0;
463
464         if (start <= cache->start) {
465                 /*
466                  *      |--------cache---------|
467                  * |-wipe-|
468                  */
469                 BUG_ON(start + len <= cache->start);
470
471                 /*
472                  * The wipe size is smaller than min_stripe_size / 2,
473                  * so the result length should still meet min_stripe_size
474                  * And no need to do alignment
475                  */
476                 cache->size -= (start + len - cache->start);
477                 if (cache->size == 0) {
478                         remove_cache_extent(tree, cache);
479                         free(cache);
480                         return 0;
481                 }
482
483                 BUG_ON(ensure_size && cache->size < min_stripe_size);
484
485                 cache->start = start + len;
486                 return 0;
487         } else if (start > cache->start && start + len < cache->start +
488                    cache->size) {
489                 /*
490                  * |-------cache-----|
491                  *      |-wipe-|
492                  */
493                 u64 old_start = cache->start;
494                 u64 old_len = cache->size;
495                 u64 insert_start = start + len;
496                 u64 insert_len;
497
498                 cache->size = start - cache->start;
499                 /* Expand the leading half part if needed */
500                 if (ensure_size && cache->size < min_stripe_size) {
501                         ret = _expand_extent_cache(tree, cache,
502                                         min_stripe_size, 1);
503                         if (ret < 0)
504                                 return ret;
505                 }
506
507                 /* And insert the new one */
508                 insert_len = old_start + old_len - start - len;
509                 ret = add_merge_cache_extent(tree, insert_start, insert_len);
510                 if (ret < 0)
511                         return ret;
512
513                 /* Expand the last half part if needed */
514                 if (ensure_size && insert_len < min_stripe_size) {
515                         cache = lookup_cache_extent(tree, insert_start,
516                                                     insert_len);
517                         if (!cache || cache->start != insert_start ||
518                             cache->size != insert_len)
519                                 return -ENOENT;
520                         ret = _expand_extent_cache(tree, cache,
521                                         min_stripe_size, 0);
522                 }
523
524                 return ret;
525         }
526         /*
527          * |----cache-----|
528          *              |--wipe-|
529          * Wipe len should be small enough and no need to expand the
530          * remaining extent
531          */
532         cache->size = start - cache->start;
533         BUG_ON(ensure_size && cache->size < min_stripe_size);
534         return 0;
535 }
536
537 /*
538  * Remove reserved ranges from given cache_tree
539  *
540  * It will remove the following ranges
541  * 1) 0~1M
542  * 2) 2nd superblock, +64K (make sure chunks are 64K aligned)
543  * 3) 3rd superblock, +64K
544  *
545  * @min_stripe must be given for safety check
546  * and if @ensure_size is given, it will ensure affected cache_extent will be
547  * larger than min_stripe_size
548  */
549 static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size,
550                                 int ensure_size)
551 {
552         int ret;
553
554         ret = wipe_one_reserved_range(tree, 0, 1024 * 1024, min_stripe_size,
555                                       ensure_size);
556         if (ret < 0)
557                 return ret;
558         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(1),
559                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
560         if (ret < 0)
561                 return ret;
562         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(2),
563                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
564         return ret;
565 }
566
567 static int calculate_available_space(struct btrfs_convert_context *cctx)
568 {
569         struct cache_tree *used = &cctx->used;
570         struct cache_tree *data_chunks = &cctx->data_chunks;
571         struct cache_tree *free = &cctx->free;
572         struct cache_extent *cache;
573         u64 cur_off = 0;
574         /*
575          * Twice the minimal chunk size, to allow later wipe_reserved_ranges()
576          * works without need to consider overlap
577          */
578         u64 min_stripe_size = 2 * 16 * 1024 * 1024;
579         int ret;
580
581         /* Calculate data_chunks */
582         for (cache = first_cache_extent(used); cache;
583              cache = next_cache_extent(cache)) {
584                 u64 cur_len;
585
586                 if (cache->start + cache->size < cur_off)
587                         continue;
588                 if (cache->start > cur_off + min_stripe_size)
589                         cur_off = cache->start;
590                 cur_len = max(cache->start + cache->size - cur_off,
591                               min_stripe_size);
592                 ret = add_merge_cache_extent(data_chunks, cur_off, cur_len);
593                 if (ret < 0)
594                         goto out;
595                 cur_off += cur_len;
596         }
597         /*
598          * remove reserved ranges, so we won't ever bother relocating an old
599          * filesystem extent to other place.
600          */
601         ret = wipe_reserved_ranges(data_chunks, min_stripe_size, 1);
602         if (ret < 0)
603                 goto out;
604
605         cur_off = 0;
606         /*
607          * Calculate free space
608          * Always round up the start bytenr, to avoid metadata extent corss
609          * stripe boundary, as later mkfs_convert() won't have all the extent
610          * allocation check
611          */
612         for (cache = first_cache_extent(data_chunks); cache;
613              cache = next_cache_extent(cache)) {
614                 if (cache->start < cur_off)
615                         continue;
616                 if (cache->start > cur_off) {
617                         u64 insert_start;
618                         u64 len;
619
620                         len = cache->start - round_up(cur_off,
621                                                       BTRFS_STRIPE_LEN);
622                         insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
623
624                         ret = add_merge_cache_extent(free, insert_start, len);
625                         if (ret < 0)
626                                 goto out;
627                 }
628                 cur_off = cache->start + cache->size;
629         }
630         /* Don't forget the last range */
631         if (cctx->total_bytes > cur_off) {
632                 u64 len = cctx->total_bytes - cur_off;
633                 u64 insert_start;
634
635                 insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
636
637                 ret = add_merge_cache_extent(free, insert_start, len);
638                 if (ret < 0)
639                         goto out;
640         }
641
642         /* Remove reserved bytes */
643         ret = wipe_reserved_ranges(free, min_stripe_size, 0);
644 out:
645         return ret;
646 }
647
648 /*
649  * Read used space, and since we have the used space,
650  * calcuate data_chunks and free for later mkfs
651  */
652 static int convert_read_used_space(struct btrfs_convert_context *cctx)
653 {
654         int ret;
655
656         ret = cctx->convert_ops->read_used_space(cctx);
657         if (ret)
658                 return ret;
659
660         ret = calculate_available_space(cctx);
661         return ret;
662 }
663
664 /*
665  * Create the fs image file of old filesystem.
666  *
667  * This is completely fs independent as we have cctx->used, only
668  * need to create file extents pointing to all the positions.
669  */
670 static int create_image(struct btrfs_root *root,
671                            struct btrfs_mkfs_config *cfg,
672                            struct btrfs_convert_context *cctx, int fd,
673                            u64 size, char *name, int datacsum)
674 {
675         struct btrfs_inode_item buf;
676         struct btrfs_trans_handle *trans;
677         struct btrfs_path path;
678         struct btrfs_key key;
679         struct cache_extent *cache;
680         struct cache_tree used_tmp;
681         u64 cur;
682         u64 ino;
683         u64 flags = BTRFS_INODE_READONLY;
684         int ret;
685
686         if (!datacsum)
687                 flags |= BTRFS_INODE_NODATASUM;
688
689         trans = btrfs_start_transaction(root, 1);
690         if (!trans)
691                 return -ENOMEM;
692
693         cache_tree_init(&used_tmp);
694         btrfs_init_path(&path);
695
696         ret = btrfs_find_free_objectid(trans, root, BTRFS_FIRST_FREE_OBJECTID,
697                                        &ino);
698         if (ret < 0)
699                 goto out;
700         ret = btrfs_new_inode(trans, root, ino, 0400 | S_IFREG);
701         if (ret < 0)
702                 goto out;
703         ret = btrfs_change_inode_flags(trans, root, ino, flags);
704         if (ret < 0)
705                 goto out;
706         ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name,
707                              strlen(name), BTRFS_FT_REG_FILE, NULL, 1);
708         if (ret < 0)
709                 goto out;
710
711         key.objectid = ino;
712         key.type = BTRFS_INODE_ITEM_KEY;
713         key.offset = 0;
714
715         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
716         if (ret) {
717                 ret = (ret > 0 ? -ENOENT : ret);
718                 goto out;
719         }
720         read_extent_buffer(path.nodes[0], &buf,
721                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
722                         sizeof(buf));
723         btrfs_release_path(&path);
724
725         /*
726          * Create a new used space cache, which doesn't contain the reserved
727          * range
728          */
729         for (cache = first_cache_extent(&cctx->used); cache;
730              cache = next_cache_extent(cache)) {
731                 ret = add_cache_extent(&used_tmp, cache->start, cache->size);
732                 if (ret < 0)
733                         goto out;
734         }
735         ret = wipe_reserved_ranges(&used_tmp, 0, 0);
736         if (ret < 0)
737                 goto out;
738
739         /*
740          * Start from 1M, as 0~1M is reserved, and create_image_file_range()
741          * can't handle bytenr 0(will consider it as a hole)
742          */
743         cur = 1024 * 1024;
744         while (cur < size) {
745                 u64 len = size - cur;
746
747                 ret = create_image_file_range(trans, root, &used_tmp,
748                                                 &buf, ino, cur, &len, datacsum);
749                 if (ret < 0)
750                         goto out;
751                 cur += len;
752         }
753         /* Handle the reserved ranges */
754         ret = migrate_reserved_ranges(trans, root, &cctx->used, &buf, fd, ino,
755                                       cfg->num_bytes, datacsum);
756
757
758         key.objectid = ino;
759         key.type = BTRFS_INODE_ITEM_KEY;
760         key.offset = 0;
761         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
762         if (ret) {
763                 ret = (ret > 0 ? -ENOENT : ret);
764                 goto out;
765         }
766         btrfs_set_stack_inode_size(&buf, cfg->num_bytes);
767         write_extent_buffer(path.nodes[0], &buf,
768                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
769                         sizeof(buf));
770 out:
771         free_extent_cache_tree(&used_tmp);
772         btrfs_release_path(&path);
773         btrfs_commit_transaction(trans, root);
774         return ret;
775 }
776
777 static struct btrfs_root* link_subvol(struct btrfs_root *root,
778                 const char *base, u64 root_objectid)
779 {
780         struct btrfs_trans_handle *trans;
781         struct btrfs_fs_info *fs_info = root->fs_info;
782         struct btrfs_root *tree_root = fs_info->tree_root;
783         struct btrfs_root *new_root = NULL;
784         struct btrfs_path path;
785         struct btrfs_inode_item *inode_item;
786         struct extent_buffer *leaf;
787         struct btrfs_key key;
788         u64 dirid = btrfs_root_dirid(&root->root_item);
789         u64 index = 2;
790         char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
791         int len;
792         int i;
793         int ret;
794
795         len = strlen(base);
796         if (len == 0 || len > BTRFS_NAME_LEN)
797                 return NULL;
798
799         btrfs_init_path(&path);
800         key.objectid = dirid;
801         key.type = BTRFS_DIR_INDEX_KEY;
802         key.offset = (u64)-1;
803
804         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
805         if (ret <= 0) {
806                 error("search for DIR_INDEX dirid %llu failed: %d",
807                                 (unsigned long long)dirid, ret);
808                 goto fail;
809         }
810
811         if (path.slots[0] > 0) {
812                 path.slots[0]--;
813                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
814                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
815                         index = key.offset + 1;
816         }
817         btrfs_release_path(&path);
818
819         trans = btrfs_start_transaction(root, 1);
820         if (!trans) {
821                 error("unable to start transaction");
822                 goto fail;
823         }
824
825         key.objectid = dirid;
826         key.offset = 0;
827         key.type =  BTRFS_INODE_ITEM_KEY;
828
829         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
830         if (ret) {
831                 error("search for INODE_ITEM %llu failed: %d",
832                                 (unsigned long long)dirid, ret);
833                 goto fail;
834         }
835         leaf = path.nodes[0];
836         inode_item = btrfs_item_ptr(leaf, path.slots[0],
837                                     struct btrfs_inode_item);
838
839         key.objectid = root_objectid;
840         key.offset = (u64)-1;
841         key.type = BTRFS_ROOT_ITEM_KEY;
842
843         memcpy(buf, base, len);
844         for (i = 0; i < 1024; i++) {
845                 ret = btrfs_insert_dir_item(trans, root, buf, len,
846                                             dirid, &key, BTRFS_FT_DIR, index);
847                 if (ret != -EEXIST)
848                         break;
849                 len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
850                 if (len < 1 || len > BTRFS_NAME_LEN) {
851                         ret = -EINVAL;
852                         break;
853                 }
854         }
855         if (ret)
856                 goto fail;
857
858         btrfs_set_inode_size(leaf, inode_item, len * 2 +
859                              btrfs_inode_size(leaf, inode_item));
860         btrfs_mark_buffer_dirty(leaf);
861         btrfs_release_path(&path);
862
863         /* add the backref first */
864         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
865                                  BTRFS_ROOT_BACKREF_KEY,
866                                  root->root_key.objectid,
867                                  dirid, index, buf, len);
868         if (ret) {
869                 error("unable to add root backref for %llu: %d",
870                                 root->root_key.objectid, ret);
871                 goto fail;
872         }
873
874         /* now add the forward ref */
875         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
876                                  BTRFS_ROOT_REF_KEY, root_objectid,
877                                  dirid, index, buf, len);
878         if (ret) {
879                 error("unable to add root ref for %llu: %d",
880                                 root->root_key.objectid, ret);
881                 goto fail;
882         }
883
884         ret = btrfs_commit_transaction(trans, root);
885         if (ret) {
886                 error("transaction commit failed: %d", ret);
887                 goto fail;
888         }
889
890         new_root = btrfs_read_fs_root(fs_info, &key);
891         if (IS_ERR(new_root)) {
892                 error("unable to fs read root: %lu", PTR_ERR(new_root));
893                 new_root = NULL;
894         }
895 fail:
896         btrfs_init_path(&path);
897         return new_root;
898 }
899
900 static int create_subvol(struct btrfs_trans_handle *trans,
901                          struct btrfs_root *root, u64 root_objectid)
902 {
903         struct extent_buffer *tmp;
904         struct btrfs_root *new_root;
905         struct btrfs_key key;
906         struct btrfs_root_item root_item;
907         int ret;
908
909         ret = btrfs_copy_root(trans, root, root->node, &tmp,
910                               root_objectid);
911         if (ret)
912                 return ret;
913
914         memcpy(&root_item, &root->root_item, sizeof(root_item));
915         btrfs_set_root_bytenr(&root_item, tmp->start);
916         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
917         btrfs_set_root_generation(&root_item, trans->transid);
918         free_extent_buffer(tmp);
919
920         key.objectid = root_objectid;
921         key.type = BTRFS_ROOT_ITEM_KEY;
922         key.offset = trans->transid;
923         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
924                                 &key, &root_item);
925
926         key.offset = (u64)-1;
927         new_root = btrfs_read_fs_root(root->fs_info, &key);
928         if (!new_root || IS_ERR(new_root)) {
929                 error("unable to fs read root: %lu", PTR_ERR(new_root));
930                 return PTR_ERR(new_root);
931         }
932
933         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
934
935         return ret;
936 }
937
938 /*
939  * New make_btrfs() has handle system and meta chunks quite well.
940  * So only need to add remaining data chunks.
941  */
942 static int make_convert_data_block_groups(struct btrfs_trans_handle *trans,
943                                           struct btrfs_fs_info *fs_info,
944                                           struct btrfs_mkfs_config *cfg,
945                                           struct btrfs_convert_context *cctx)
946 {
947         struct btrfs_root *extent_root = fs_info->extent_root;
948         struct cache_tree *data_chunks = &cctx->data_chunks;
949         struct cache_extent *cache;
950         u64 max_chunk_size;
951         int ret = 0;
952
953         /*
954          * Don't create data chunk over 10% of the convert device
955          * And for single chunk, don't create chunk larger than 1G.
956          */
957         max_chunk_size = cfg->num_bytes / 10;
958         max_chunk_size = min((u64)(1024 * 1024 * 1024), max_chunk_size);
959         max_chunk_size = round_down(max_chunk_size, extent_root->sectorsize);
960
961         for (cache = first_cache_extent(data_chunks); cache;
962              cache = next_cache_extent(cache)) {
963                 u64 cur = cache->start;
964
965                 while (cur < cache->start + cache->size) {
966                         u64 len;
967                         u64 cur_backup = cur;
968
969                         len = min(max_chunk_size,
970                                   cache->start + cache->size - cur);
971                         ret = btrfs_alloc_data_chunk(trans, extent_root,
972                                         &cur_backup, len,
973                                         BTRFS_BLOCK_GROUP_DATA, 1);
974                         if (ret < 0)
975                                 break;
976                         ret = btrfs_make_block_group(trans, extent_root, 0,
977                                         BTRFS_BLOCK_GROUP_DATA,
978                                         BTRFS_FIRST_CHUNK_TREE_OBJECTID,
979                                         cur, len);
980                         if (ret < 0)
981                                 break;
982                         cur += len;
983                 }
984         }
985         return ret;
986 }
987
988 /*
989  * Init the temp btrfs to a operational status.
990  *
991  * It will fix the extent usage accounting(XXX: Do we really need?) and
992  * insert needed data chunks, to ensure all old fs data extents are covered
993  * by DATA chunks, preventing wrong chunks are allocated.
994  *
995  * And also create convert image subvolume and relocation tree.
996  * (XXX: Not need again?)
997  * But the convert image subvolume is *NOT* linked to fs tree yet.
998  */
999 static int init_btrfs(struct btrfs_mkfs_config *cfg, struct btrfs_root *root,
1000                          struct btrfs_convert_context *cctx, int datacsum,
1001                          int packing, int noxattr)
1002 {
1003         struct btrfs_key location;
1004         struct btrfs_trans_handle *trans;
1005         struct btrfs_fs_info *fs_info = root->fs_info;
1006         int ret;
1007
1008         /*
1009          * Don't alloc any metadata/system chunk, as we don't want
1010          * any meta/sys chunk allcated before all data chunks are inserted.
1011          * Or we screw up the chunk layout just like the old implement.
1012          */
1013         fs_info->avoid_sys_chunk_alloc = 1;
1014         fs_info->avoid_meta_chunk_alloc = 1;
1015         trans = btrfs_start_transaction(root, 1);
1016         if (!trans) {
1017                 error("unable to start transaction");
1018                 ret = -EINVAL;
1019                 goto err;
1020         }
1021         ret = btrfs_fix_block_accounting(trans, root);
1022         if (ret)
1023                 goto err;
1024         ret = make_convert_data_block_groups(trans, fs_info, cfg, cctx);
1025         if (ret)
1026                 goto err;
1027         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1028                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1029         if (ret)
1030                 goto err;
1031         memcpy(&location, &root->root_key, sizeof(location));
1032         location.offset = (u64)-1;
1033         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1034                                 btrfs_super_root_dir(fs_info->super_copy),
1035                                 &location, BTRFS_FT_DIR, 0);
1036         if (ret)
1037                 goto err;
1038         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1039                                 location.objectid,
1040                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1041         if (ret)
1042                 goto err;
1043         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1044                              BTRFS_FIRST_FREE_OBJECTID);
1045
1046         /* subvol for fs image file */
1047         ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
1048         if (ret < 0) {
1049                 error("failed to create subvolume image root: %d", ret);
1050                 goto err;
1051         }
1052         /* subvol for data relocation tree */
1053         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1054         if (ret < 0) {
1055                 error("failed to create DATA_RELOC root: %d", ret);
1056                 goto err;
1057         }
1058
1059         ret = btrfs_commit_transaction(trans, root);
1060         fs_info->avoid_sys_chunk_alloc = 0;
1061         fs_info->avoid_meta_chunk_alloc = 0;
1062 err:
1063         return ret;
1064 }
1065
1066 /*
1067  * Migrate super block to its default position and zero 0 ~ 16k
1068  */
1069 static int migrate_super_block(int fd, u64 old_bytenr)
1070 {
1071         int ret;
1072         struct extent_buffer *buf;
1073         struct btrfs_super_block *super;
1074         u32 len;
1075         u32 bytenr;
1076
1077         buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE);
1078         if (!buf)
1079                 return -ENOMEM;
1080
1081         buf->len = BTRFS_SUPER_INFO_SIZE;
1082         ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, old_bytenr);
1083         if (ret != BTRFS_SUPER_INFO_SIZE)
1084                 goto fail;
1085
1086         super = (struct btrfs_super_block *)buf->data;
1087         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1088         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1089
1090         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1091         ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE,
1092                 BTRFS_SUPER_INFO_OFFSET);
1093         if (ret != BTRFS_SUPER_INFO_SIZE)
1094                 goto fail;
1095
1096         ret = fsync(fd);
1097         if (ret)
1098                 goto fail;
1099
1100         memset(buf->data, 0, BTRFS_SUPER_INFO_SIZE);
1101         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
1102                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
1103                 if (len > BTRFS_SUPER_INFO_SIZE)
1104                         len = BTRFS_SUPER_INFO_SIZE;
1105                 ret = pwrite(fd, buf->data, len, bytenr);
1106                 if (ret != len) {
1107                         fprintf(stderr, "unable to zero fill device\n");
1108                         break;
1109                 }
1110                 bytenr += len;
1111         }
1112         ret = 0;
1113         fsync(fd);
1114 fail:
1115         free(buf);
1116         if (ret > 0)
1117                 ret = -1;
1118         return ret;
1119 }
1120
1121 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
1122 {
1123         struct btrfs_chunk *chunk;
1124         struct btrfs_disk_key *key;
1125         u32 sectorsize = btrfs_super_sectorsize(super);
1126
1127         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1128         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1129                                        sizeof(struct btrfs_disk_key));
1130
1131         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1132         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1133         btrfs_set_disk_key_offset(key, 0);
1134
1135         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
1136         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1137         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1138         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1139         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1140         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1141         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1142         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1143         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1144         chunk->stripe.devid = super->dev_item.devid;
1145         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1146         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1147         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1148         return 0;
1149 }
1150
1151 static int convert_open_fs(const char *devname,
1152                            struct btrfs_convert_context *cctx)
1153 {
1154         int i;
1155
1156         for (i = 0; i < ARRAY_SIZE(convert_operations); i++) {
1157                 int ret = convert_operations[i]->open_fs(cctx, devname);
1158
1159                 if (ret == 0) {
1160                         cctx->convert_ops = convert_operations[i];
1161                         return ret;
1162                 }
1163         }
1164
1165         error("no file system found to convert");
1166         return -1;
1167 }
1168
1169 static int do_convert(const char *devname, int datacsum, int packing,
1170                 int noxattr, u32 nodesize, int copylabel, const char *fslabel,
1171                 int progress, u64 features)
1172 {
1173         int ret;
1174         int fd = -1;
1175         u32 blocksize;
1176         u64 total_bytes;
1177         struct btrfs_root *root;
1178         struct btrfs_root *image_root;
1179         struct btrfs_convert_context cctx;
1180         struct btrfs_key key;
1181         char subvol_name[SOURCE_FS_NAME_LEN + 8];
1182         struct task_ctx ctx;
1183         char features_buf[64];
1184         struct btrfs_mkfs_config mkfs_cfg;
1185
1186         init_convert_context(&cctx);
1187         ret = convert_open_fs(devname, &cctx);
1188         if (ret)
1189                 goto fail;
1190         ret = convert_check_state(&cctx);
1191         if (ret)
1192                 warning(
1193                 "source filesystem is not clean, running filesystem check is recommended");
1194         ret = convert_read_used_space(&cctx);
1195         if (ret)
1196                 goto fail;
1197
1198         blocksize = cctx.blocksize;
1199         total_bytes = (u64)blocksize * (u64)cctx.block_count;
1200         if (blocksize < 4096) {
1201                 error("block size is too small: %u < 4096", blocksize);
1202                 goto fail;
1203         }
1204         if (btrfs_check_nodesize(nodesize, blocksize, features))
1205                 goto fail;
1206         fd = open(devname, O_RDWR);
1207         if (fd < 0) {
1208                 error("unable to open %s: %s", devname, strerror(errno));
1209                 goto fail;
1210         }
1211         btrfs_parse_features_to_string(features_buf, features);
1212         if (features == BTRFS_MKFS_DEFAULT_FEATURES)
1213                 strcat(features_buf, " (default)");
1214
1215         printf("create btrfs filesystem:\n");
1216         printf("\tblocksize: %u\n", blocksize);
1217         printf("\tnodesize:  %u\n", nodesize);
1218         printf("\tfeatures:  %s\n", features_buf);
1219
1220         mkfs_cfg.label = cctx.volume_name;
1221         mkfs_cfg.num_bytes = total_bytes;
1222         mkfs_cfg.nodesize = nodesize;
1223         mkfs_cfg.sectorsize = blocksize;
1224         mkfs_cfg.stripesize = blocksize;
1225         mkfs_cfg.features = features;
1226         /* New convert need these space */
1227         memset(mkfs_cfg.chunk_uuid, 0, BTRFS_UUID_UNPARSED_SIZE);
1228         memset(mkfs_cfg.fs_uuid, 0, BTRFS_UUID_UNPARSED_SIZE);
1229
1230         ret = make_convert_btrfs(fd, &mkfs_cfg, &cctx);
1231         if (ret) {
1232                 error("unable to create initial ctree: %s", strerror(-ret));
1233                 goto fail;
1234         }
1235
1236         root = open_ctree_fd(fd, devname, mkfs_cfg.super_bytenr,
1237                              OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1238         if (!root) {
1239                 error("unable to open ctree");
1240                 goto fail;
1241         }
1242         ret = init_btrfs(&mkfs_cfg, root, &cctx, datacsum, packing, noxattr);
1243         if (ret) {
1244                 error("unable to setup the root tree: %d", ret);
1245                 goto fail;
1246         }
1247
1248         printf("creating %s image file\n", cctx.convert_ops->name);
1249         snprintf(subvol_name, sizeof(subvol_name), "%s_saved",
1250                         cctx.convert_ops->name);
1251         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1252         key.offset = (u64)-1;
1253         key.type = BTRFS_ROOT_ITEM_KEY;
1254         image_root = btrfs_read_fs_root(root->fs_info, &key);
1255         if (!image_root) {
1256                 error("unable to create image subvolume");
1257                 goto fail;
1258         }
1259         ret = create_image(image_root, &mkfs_cfg, &cctx, fd,
1260                               mkfs_cfg.num_bytes, "image", datacsum);
1261         if (ret) {
1262                 error("failed to create %s/image: %d", subvol_name, ret);
1263                 goto fail;
1264         }
1265
1266         printf("creating btrfs metadata");
1267         ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
1268         ctx.cur_copy_inodes = 0;
1269
1270         if (progress) {
1271                 ctx.info = task_init(print_copied_inodes, after_copied_inodes,
1272                                      &ctx);
1273                 task_start(ctx.info);
1274         }
1275         ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx);
1276         if (ret) {
1277                 error("error during copy_inodes %d", ret);
1278                 goto fail;
1279         }
1280         if (progress) {
1281                 task_stop(ctx.info);
1282                 task_deinit(ctx.info);
1283         }
1284
1285         image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
1286         if (!image_root) {
1287                 error("unable to link subvolume %s", subvol_name);
1288                 goto fail;
1289         }
1290
1291         memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
1292         if (copylabel == 1) {
1293                 __strncpy_null(root->fs_info->super_copy->label,
1294                                 cctx.volume_name, BTRFS_LABEL_SIZE - 1);
1295                 printf("copy label '%s'\n", root->fs_info->super_copy->label);
1296         } else if (copylabel == -1) {
1297                 strcpy(root->fs_info->super_copy->label, fslabel);
1298                 printf("set label to '%s'\n", fslabel);
1299         }
1300
1301         ret = close_ctree(root);
1302         if (ret) {
1303                 error("close_ctree failed: %d", ret);
1304                 goto fail;
1305         }
1306         convert_close_fs(&cctx);
1307         clean_convert_context(&cctx);
1308
1309         /*
1310          * If this step succeed, we get a mountable btrfs. Otherwise
1311          * the source fs is left unchanged.
1312          */
1313         ret = migrate_super_block(fd, mkfs_cfg.super_bytenr);
1314         if (ret) {
1315                 error("unable to migrate super block: %d", ret);
1316                 goto fail;
1317         }
1318
1319         root = open_ctree_fd(fd, devname, 0,
1320                         OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1321         if (!root) {
1322                 error("unable to open ctree for finalization");
1323                 goto fail;
1324         }
1325         root->fs_info->finalize_on_close = 1;
1326         close_ctree(root);
1327         close(fd);
1328
1329         printf("conversion complete");
1330         return 0;
1331 fail:
1332         clean_convert_context(&cctx);
1333         if (fd != -1)
1334                 close(fd);
1335         warning(
1336 "an error occurred during conversion, filesystem is partially created but not finalized and not mountable");
1337         return -1;
1338 }
1339
1340 /*
1341  * Check if a non 1:1 mapped chunk can be rolled back.
1342  * For new convert, it's OK while for old convert it's not.
1343  */
1344 static int may_rollback_chunk(struct btrfs_fs_info *fs_info, u64 bytenr)
1345 {
1346         struct btrfs_block_group_cache *bg;
1347         struct btrfs_key key;
1348         struct btrfs_path path;
1349         struct btrfs_root *extent_root = fs_info->extent_root;
1350         u64 bg_start;
1351         u64 bg_end;
1352         int ret;
1353
1354         bg = btrfs_lookup_first_block_group(fs_info, bytenr);
1355         if (!bg)
1356                 return -ENOENT;
1357         bg_start = bg->key.objectid;
1358         bg_end = bg->key.objectid + bg->key.offset;
1359
1360         key.objectid = bg_end;
1361         key.type = BTRFS_METADATA_ITEM_KEY;
1362         key.offset = 0;
1363         btrfs_init_path(&path);
1364
1365         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
1366         if (ret < 0)
1367                 return ret;
1368
1369         while (1) {
1370                 struct btrfs_extent_item *ei;
1371
1372                 ret = btrfs_previous_extent_item(extent_root, &path, bg_start);
1373                 if (ret > 0) {
1374                         ret = 0;
1375                         break;
1376                 }
1377                 if (ret < 0)
1378                         break;
1379
1380                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1381                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1382                         continue;
1383                 /* Now it's EXTENT_ITEM_KEY only */
1384                 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
1385                                     struct btrfs_extent_item);
1386                 /*
1387                  * Found data extent, means this is old convert must follow 1:1
1388                  * mapping.
1389                  */
1390                 if (btrfs_extent_flags(path.nodes[0], ei)
1391                                 & BTRFS_EXTENT_FLAG_DATA) {
1392                         ret = -EINVAL;
1393                         break;
1394                 }
1395         }
1396         btrfs_release_path(&path);
1397         return ret;
1398 }
1399
1400 static int may_rollback(struct btrfs_root *root)
1401 {
1402         struct btrfs_fs_info *info = root->fs_info;
1403         struct btrfs_multi_bio *multi = NULL;
1404         u64 bytenr;
1405         u64 length;
1406         u64 physical;
1407         u64 total_bytes;
1408         int num_stripes;
1409         int ret;
1410
1411         if (btrfs_super_num_devices(info->super_copy) != 1)
1412                 goto fail;
1413
1414         bytenr = BTRFS_SUPER_INFO_OFFSET;
1415         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1416
1417         while (1) {
1418                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
1419                                       &length, &multi, 0, NULL);
1420                 if (ret) {
1421                         if (ret == -ENOENT) {
1422                                 /* removed block group at the tail */
1423                                 if (length == (u64)-1)
1424                                         break;
1425
1426                                 /* removed block group in the middle */
1427                                 goto next;
1428                         }
1429                         goto fail;
1430                 }
1431
1432                 num_stripes = multi->num_stripes;
1433                 physical = multi->stripes[0].physical;
1434                 free(multi);
1435
1436                 if (num_stripes != 1) {
1437                         error("num stripes for bytenr %llu is not 1", bytenr);
1438                         goto fail;
1439                 }
1440
1441                 /*
1442                  * Extra check for new convert, as metadata chunk from new
1443                  * convert is much more free than old convert, it doesn't need
1444                  * to do 1:1 mapping.
1445                  */
1446                 if (physical != bytenr) {
1447                         /*
1448                          * Check if it's a metadata chunk and has only metadata
1449                          * extent.
1450                          */
1451                         ret = may_rollback_chunk(info, bytenr);
1452                         if (ret < 0)
1453                                 goto fail;
1454                 }
1455 next:
1456                 bytenr += length;
1457                 if (bytenr >= total_bytes)
1458                         break;
1459         }
1460         return 0;
1461 fail:
1462         return -1;
1463 }
1464
1465 static int do_rollback(const char *devname)
1466 {
1467         int fd = -1;
1468         int ret;
1469         int i;
1470         struct btrfs_root *root;
1471         struct btrfs_root *image_root;
1472         struct btrfs_root *chunk_root;
1473         struct btrfs_dir_item *dir;
1474         struct btrfs_inode_item *inode;
1475         struct btrfs_file_extent_item *fi;
1476         struct btrfs_trans_handle *trans;
1477         struct extent_buffer *leaf;
1478         struct btrfs_block_group_cache *cache1;
1479         struct btrfs_block_group_cache *cache2;
1480         struct btrfs_key key;
1481         struct btrfs_path path;
1482         struct extent_io_tree io_tree;
1483         char *buf = NULL;
1484         char *name;
1485         u64 bytenr;
1486         u64 num_bytes;
1487         u64 root_dir;
1488         u64 objectid;
1489         u64 offset;
1490         u64 start;
1491         u64 end;
1492         u64 sb_bytenr;
1493         u64 first_free;
1494         u64 total_bytes;
1495         u32 sectorsize;
1496
1497         extent_io_tree_init(&io_tree);
1498
1499         fd = open(devname, O_RDWR);
1500         if (fd < 0) {
1501                 error("unable to open %s: %s", devname, strerror(errno));
1502                 goto fail;
1503         }
1504         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
1505         if (!root) {
1506                 error("unable to open ctree");
1507                 goto fail;
1508         }
1509         ret = may_rollback(root);
1510         if (ret < 0) {
1511                 error("unable to do rollback: %d", ret);
1512                 goto fail;
1513         }
1514
1515         sectorsize = root->sectorsize;
1516         buf = malloc(sectorsize);
1517         if (!buf) {
1518                 error("unable to allocate memory");
1519                 goto fail;
1520         }
1521
1522         btrfs_init_path(&path);
1523
1524         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1525         key.type = BTRFS_ROOT_BACKREF_KEY;
1526         key.offset = BTRFS_FS_TREE_OBJECTID;
1527         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0,
1528                                 0);
1529         btrfs_release_path(&path);
1530         if (ret > 0) {
1531                 error("unable to convert ext2 image subvolume, is it deleted?");
1532                 goto fail;
1533         } else if (ret < 0) {
1534                 error("unable to open ext2_saved, id %llu: %s",
1535                         (unsigned long long)key.objectid, strerror(-ret));
1536                 goto fail;
1537         }
1538
1539         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1540         key.type = BTRFS_ROOT_ITEM_KEY;
1541         key.offset = (u64)-1;
1542         image_root = btrfs_read_fs_root(root->fs_info, &key);
1543         if (!image_root || IS_ERR(image_root)) {
1544                 error("unable to open subvolume %llu: %ld",
1545                         (unsigned long long)key.objectid, PTR_ERR(image_root));
1546                 goto fail;
1547         }
1548
1549         name = "image";
1550         root_dir = btrfs_root_dirid(&root->root_item);
1551         dir = btrfs_lookup_dir_item(NULL, image_root, &path,
1552                                    root_dir, name, strlen(name), 0);
1553         if (!dir || IS_ERR(dir)) {
1554                 error("unable to find file %s: %ld", name, PTR_ERR(dir));
1555                 goto fail;
1556         }
1557         leaf = path.nodes[0];
1558         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
1559         btrfs_release_path(&path);
1560
1561         objectid = key.objectid;
1562
1563         ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0);
1564         if (ret) {
1565                 error("unable to find inode item: %d", ret);
1566                 goto fail;
1567         }
1568         leaf = path.nodes[0];
1569         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
1570         total_bytes = btrfs_inode_size(leaf, inode);
1571         btrfs_release_path(&path);
1572
1573         key.objectid = objectid;
1574         key.offset = 0;
1575         key.type = BTRFS_EXTENT_DATA_KEY;
1576         ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0);
1577         if (ret != 0) {
1578                 error("unable to find first file extent: %d", ret);
1579                 btrfs_release_path(&path);
1580                 goto fail;
1581         }
1582
1583         /* build mapping tree for the relocated blocks */
1584         for (offset = 0; offset < total_bytes; ) {
1585                 leaf = path.nodes[0];
1586                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1587                         ret = btrfs_next_leaf(root, &path);
1588                         if (ret != 0)
1589                                 break;  
1590                         continue;
1591                 }
1592
1593                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1594                 if (key.objectid != objectid || key.offset != offset ||
1595                     key.type != BTRFS_EXTENT_DATA_KEY)
1596                         break;
1597
1598                 fi = btrfs_item_ptr(leaf, path.slots[0],
1599                                     struct btrfs_file_extent_item);
1600                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
1601                         break;
1602                 if (btrfs_file_extent_compression(leaf, fi) ||
1603                     btrfs_file_extent_encryption(leaf, fi) ||
1604                     btrfs_file_extent_other_encoding(leaf, fi))
1605                         break;
1606
1607                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1608                 /* skip holes and direct mapped extents */
1609                 if (bytenr == 0 || bytenr == offset)
1610                         goto next_extent;
1611
1612                 bytenr += btrfs_file_extent_offset(leaf, fi);
1613                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
1614
1615                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1616                 cache2 = btrfs_lookup_block_group(root->fs_info,
1617                                                   offset + num_bytes - 1);
1618                 /*
1619                  * Here we must take consideration of old and new convert
1620                  * behavior.
1621                  * For old convert case, sign, there is no consist chunk type
1622                  * that will cover the extent. META/DATA/SYS are all possible.
1623                  * Just ensure relocate one is in SYS chunk.
1624                  * For new convert case, they are all covered by DATA chunk.
1625                  *
1626                  * So, there is not valid chunk type check for it now.
1627                  */
1628                 if (cache1 != cache2)
1629                         break;
1630
1631                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
1632                                 EXTENT_LOCKED, GFP_NOFS);
1633                 set_state_private(&io_tree, offset, bytenr);
1634 next_extent:
1635                 offset += btrfs_file_extent_num_bytes(leaf, fi);
1636                 path.slots[0]++;
1637         }
1638         btrfs_release_path(&path);
1639
1640         if (offset < total_bytes) {
1641                 error("unable to build extent mapping (offset %llu, total_bytes %llu)",
1642                                 (unsigned long long)offset,
1643                                 (unsigned long long)total_bytes);
1644                 error("converted filesystem after balance is unable to rollback");
1645                 goto fail;
1646         }
1647
1648         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
1649         first_free &= ~((u64)sectorsize - 1);
1650         /* backup for extent #0 should exist */
1651         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
1652                 error("no backup for the first extent");
1653                 goto fail;
1654         }
1655         /* force no allocation from system block group */
1656         root->fs_info->system_allocs = -1;
1657         trans = btrfs_start_transaction(root, 1);
1658         if (!trans) {
1659                 error("unable to start transaction");
1660                 goto fail;
1661         }
1662         /*
1663          * recow the whole chunk tree, this will remove all chunk tree blocks
1664          * from system block group
1665          */
1666         chunk_root = root->fs_info->chunk_root;
1667         memset(&key, 0, sizeof(key));
1668         while (1) {
1669                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
1670                 if (ret < 0)
1671                         break;
1672
1673                 ret = btrfs_next_leaf(chunk_root, &path);
1674                 if (ret)
1675                         break;
1676
1677                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1678                 btrfs_release_path(&path);
1679         }
1680         btrfs_release_path(&path);
1681
1682         offset = 0;
1683         num_bytes = 0;
1684         while(1) {
1685                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1686                 if (!cache1)
1687                         break;
1688
1689                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
1690                         num_bytes += btrfs_block_group_used(&cache1->item);
1691
1692                 offset = cache1->key.objectid + cache1->key.offset;
1693         }
1694         /* only extent #0 left in system block group? */
1695         if (num_bytes > first_free) {
1696                 error(
1697         "unable to empty system block group (num_bytes %llu, first_free %llu",
1698                                 (unsigned long long)num_bytes,
1699                                 (unsigned long long)first_free);
1700                 goto fail;
1701         }
1702         /* create a system chunk that maps the whole device */
1703         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
1704         if (ret) {
1705                 error("unable to update system chunk: %d", ret);
1706                 goto fail;
1707         }
1708
1709         ret = btrfs_commit_transaction(trans, root);
1710         if (ret) {
1711                 error("transaction commit failed: %d", ret);
1712                 goto fail;
1713         }
1714
1715         ret = close_ctree(root);
1716         if (ret) {
1717                 error("close_ctree failed: %d", ret);
1718                 goto fail;
1719         }
1720
1721         /* zero btrfs super block mirrors */
1722         memset(buf, 0, sectorsize);
1723         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1724                 bytenr = btrfs_sb_offset(i);
1725                 if (bytenr >= total_bytes)
1726                         break;
1727                 ret = pwrite(fd, buf, sectorsize, bytenr);
1728                 if (ret != sectorsize) {
1729                         error("zeroing superblock mirror %d failed: %d",
1730                                         i, ret);
1731                         goto fail;
1732                 }
1733         }
1734
1735         sb_bytenr = (u64)-1;
1736         /* copy all relocated blocks back */
1737         while(1) {
1738                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
1739                                             EXTENT_LOCKED);
1740                 if (ret)
1741                         break;
1742
1743                 ret = get_state_private(&io_tree, start, &bytenr);
1744                 BUG_ON(ret);
1745
1746                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
1747                                   GFP_NOFS);
1748
1749                 while (start <= end) {
1750                         if (start == BTRFS_SUPER_INFO_OFFSET) {
1751                                 sb_bytenr = bytenr;
1752                                 goto next_sector;
1753                         }
1754                         ret = pread(fd, buf, sectorsize, bytenr);
1755                         if (ret < 0) {
1756                                 error("reading superblock at %llu failed: %d",
1757                                                 (unsigned long long)bytenr, ret);
1758                                 goto fail;
1759                         }
1760                         BUG_ON(ret != sectorsize);
1761                         ret = pwrite(fd, buf, sectorsize, start);
1762                         if (ret < 0) {
1763                                 error("writing superblock at %llu failed: %d",
1764                                                 (unsigned long long)start, ret);
1765                                 goto fail;
1766                         }
1767                         BUG_ON(ret != sectorsize);
1768 next_sector:
1769                         start += sectorsize;
1770                         bytenr += sectorsize;
1771                 }
1772         }
1773
1774         ret = fsync(fd);
1775         if (ret < 0) {
1776                 error("fsync failed: %s", strerror(errno));
1777                 goto fail;
1778         }
1779         /*
1780          * finally, overwrite btrfs super block.
1781          */
1782         ret = pread(fd, buf, sectorsize, sb_bytenr);
1783         if (ret < 0) {
1784                 error("reading primary superblock failed: %s",
1785                                 strerror(errno));
1786                 goto fail;
1787         }
1788         BUG_ON(ret != sectorsize);
1789         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1790         if (ret < 0) {
1791                 error("writing primary superblock failed: %s",
1792                                 strerror(errno));
1793                 goto fail;
1794         }
1795         BUG_ON(ret != sectorsize);
1796         ret = fsync(fd);
1797         if (ret < 0) {
1798                 error("fsync failed: %s", strerror(errno));
1799                 goto fail;
1800         }
1801
1802         close(fd);
1803         free(buf);
1804         extent_io_tree_cleanup(&io_tree);
1805         printf("rollback complete\n");
1806         return 0;
1807
1808 fail:
1809         if (fd != -1)
1810                 close(fd);
1811         free(buf);
1812         error("rollback aborted");
1813         return -1;
1814 }
1815
1816 static void print_usage(void)
1817 {
1818         printf("usage: btrfs-convert [options] device\n");
1819         printf("options:\n");
1820         printf("\t-d|--no-datasum        disable data checksum, sets NODATASUM\n");
1821         printf("\t-i|--no-xattr          ignore xattrs and ACLs\n");
1822         printf("\t-n|--no-inline         disable inlining of small files to metadata\n");
1823         printf("\t-N|--nodesize SIZE     set filesystem metadata nodesize\n");
1824         printf("\t-r|--rollback          roll back to the original filesystem\n");
1825         printf("\t-l|--label LABEL       set filesystem label\n");
1826         printf("\t-L|--copy-label        use label from converted filesystem\n");
1827         printf("\t-p|--progress          show converting progress (default)\n");
1828         printf("\t-O|--features LIST     comma separated list of filesystem features\n");
1829         printf("\t--no-progress          show only overview, not the detailed progress\n");
1830         printf("\n");
1831         printf("Supported filesystems:\n");
1832         printf("\text2/3/4: %s\n", BTRFSCONVERT_EXT2 ? "yes" : "no");
1833 }
1834
1835 int main(int argc, char *argv[])
1836 {
1837         int ret;
1838         int packing = 1;
1839         int noxattr = 0;
1840         int datacsum = 1;
1841         u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
1842                         BTRFS_MKFS_DEFAULT_NODE_SIZE);
1843         int rollback = 0;
1844         int copylabel = 0;
1845         int usage_error = 0;
1846         int progress = 1;
1847         char *file;
1848         char fslabel[BTRFS_LABEL_SIZE];
1849         u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
1850
1851         while(1) {
1852                 enum { GETOPT_VAL_NO_PROGRESS = 256 };
1853                 static const struct option long_options[] = {
1854                         { "no-progress", no_argument, NULL,
1855                                 GETOPT_VAL_NO_PROGRESS },
1856                         { "no-datasum", no_argument, NULL, 'd' },
1857                         { "no-inline", no_argument, NULL, 'n' },
1858                         { "no-xattr", no_argument, NULL, 'i' },
1859                         { "rollback", no_argument, NULL, 'r' },
1860                         { "features", required_argument, NULL, 'O' },
1861                         { "progress", no_argument, NULL, 'p' },
1862                         { "label", required_argument, NULL, 'l' },
1863                         { "copy-label", no_argument, NULL, 'L' },
1864                         { "nodesize", required_argument, NULL, 'N' },
1865                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
1866                         { NULL, 0, NULL, 0 }
1867                 };
1868                 int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL);
1869
1870                 if (c < 0)
1871                         break;
1872                 switch(c) {
1873                         case 'd':
1874                                 datacsum = 0;
1875                                 break;
1876                         case 'i':
1877                                 noxattr = 1;
1878                                 break;
1879                         case 'n':
1880                                 packing = 0;
1881                                 break;
1882                         case 'N':
1883                                 nodesize = parse_size(optarg);
1884                                 break;
1885                         case 'r':
1886                                 rollback = 1;
1887                                 break;
1888                         case 'l':
1889                                 copylabel = -1;
1890                                 if (strlen(optarg) >= BTRFS_LABEL_SIZE) {
1891                                         warning(
1892                                         "label too long, trimmed to %d bytes",
1893                                                 BTRFS_LABEL_SIZE - 1);
1894                                 }
1895                                 __strncpy_null(fslabel, optarg, BTRFS_LABEL_SIZE - 1);
1896                                 break;
1897                         case 'L':
1898                                 copylabel = 1;
1899                                 break;
1900                         case 'p':
1901                                 progress = 1;
1902                                 break;
1903                         case 'O': {
1904                                 char *orig = strdup(optarg);
1905                                 char *tmp = orig;
1906
1907                                 tmp = btrfs_parse_fs_features(tmp, &features);
1908                                 if (tmp) {
1909                                         error("unrecognized filesystem feature: %s",
1910                                                         tmp);
1911                                         free(orig);
1912                                         exit(1);
1913                                 }
1914                                 free(orig);
1915                                 if (features & BTRFS_FEATURE_LIST_ALL) {
1916                                         btrfs_list_all_fs_features(
1917                                                 ~BTRFS_CONVERT_ALLOWED_FEATURES);
1918                                         exit(0);
1919                                 }
1920                                 if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) {
1921                                         char buf[64];
1922
1923                                         btrfs_parse_features_to_string(buf,
1924                                                 features & ~BTRFS_CONVERT_ALLOWED_FEATURES);
1925                                         error("features not allowed for convert: %s",
1926                                                 buf);
1927                                         exit(1);
1928                                 }
1929
1930                                 break;
1931                                 }
1932                         case GETOPT_VAL_NO_PROGRESS:
1933                                 progress = 0;
1934                                 break;
1935                         case GETOPT_VAL_HELP:
1936                         default:
1937                                 print_usage();
1938                                 return c != GETOPT_VAL_HELP;
1939                 }
1940         }
1941         set_argv0(argv);
1942         if (check_argc_exact(argc - optind, 1)) {
1943                 print_usage();
1944                 return 1;
1945         }
1946
1947         if (rollback && (!datacsum || noxattr || !packing)) {
1948                 fprintf(stderr,
1949                         "Usage error: -d, -i, -n options do not apply to rollback\n");
1950                 usage_error++;
1951         }
1952
1953         if (usage_error) {
1954                 print_usage();
1955                 return 1;
1956         }
1957
1958         file = argv[optind];
1959         ret = check_mounted(file);
1960         if (ret < 0) {
1961                 error("could not check mount status: %s", strerror(-ret));
1962                 return 1;
1963         } else if (ret) {
1964                 error("%s is mounted", file);
1965                 return 1;
1966         }
1967
1968         if (rollback) {
1969                 ret = do_rollback(file);
1970         } else {
1971                 ret = do_convert(file, datacsum, packing, noxattr, nodesize,
1972                                 copylabel, fslabel, progress, features);
1973         }
1974         if (ret)
1975                 return 1;
1976         return 0;
1977 }