aa0d805b1480c87f04c6c620c2836d0f5f9fdde6
[platform/upstream/btrfs-progs.git] / convert / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include "kerncompat.h"
20
21 #include <sys/ioctl.h>
22 #include <sys/mount.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <unistd.h>
29 #include <uuid/uuid.h>
30 #include <linux/limits.h>
31 #include <getopt.h>
32
33 #include "ctree.h"
34 #include "disk-io.h"
35 #include "volumes.h"
36 #include "transaction.h"
37 #include "crc32c.h"
38 #include "utils.h"
39 #include "task-utils.h"
40 #include "help.h"
41 #include "mkfs/common.h"
42 #include "convert/common.h"
43 #include "convert/source-fs.h"
44 #include "fsfeatures.h"
45
46 static void *print_copied_inodes(void *p)
47 {
48         struct task_ctx *priv = p;
49         const char work_indicator[] = { '.', 'o', 'O', 'o' };
50         uint32_t count = 0;
51
52         task_period_start(priv->info, 1000 /* 1s */);
53         while (1) {
54                 count++;
55                 printf("copy inodes [%c] [%10d/%10d]\r",
56                        work_indicator[count % 4], priv->cur_copy_inodes,
57                        priv->max_copy_inodes);
58                 fflush(stdout);
59                 task_period_wait(priv->info);
60         }
61
62         return NULL;
63 }
64
65 static int after_copied_inodes(void *p)
66 {
67         printf("\n");
68         fflush(stdout);
69
70         return 0;
71 }
72
73 static inline int copy_inodes(struct btrfs_convert_context *cctx,
74                               struct btrfs_root *root, int datacsum,
75                               int packing, int noxattr, struct task_ctx *p)
76 {
77         return cctx->convert_ops->copy_inodes(cctx, root, datacsum, packing,
78                                              noxattr, p);
79 }
80
81 static inline void convert_close_fs(struct btrfs_convert_context *cctx)
82 {
83         cctx->convert_ops->close_fs(cctx);
84 }
85
86 static inline int convert_check_state(struct btrfs_convert_context *cctx)
87 {
88         return cctx->convert_ops->check_state(cctx);
89 }
90
91 static int csum_disk_extent(struct btrfs_trans_handle *trans,
92                             struct btrfs_root *root,
93                             u64 disk_bytenr, u64 num_bytes)
94 {
95         u32 blocksize = root->sectorsize;
96         u64 offset;
97         char *buffer;
98         int ret = 0;
99
100         buffer = malloc(blocksize);
101         if (!buffer)
102                 return -ENOMEM;
103         for (offset = 0; offset < num_bytes; offset += blocksize) {
104                 ret = read_disk_extent(root, disk_bytenr + offset,
105                                         blocksize, buffer);
106                 if (ret)
107                         break;
108                 ret = btrfs_csum_file_block(trans,
109                                             root->fs_info->csum_root,
110                                             disk_bytenr + num_bytes,
111                                             disk_bytenr + offset,
112                                             buffer, blocksize);
113                 if (ret)
114                         break;
115         }
116         free(buffer);
117         return ret;
118 }
119
120 static int create_image_file_range(struct btrfs_trans_handle *trans,
121                                       struct btrfs_root *root,
122                                       struct cache_tree *used,
123                                       struct btrfs_inode_item *inode,
124                                       u64 ino, u64 bytenr, u64 *ret_len,
125                                       int datacsum)
126 {
127         struct cache_extent *cache;
128         struct btrfs_block_group_cache *bg_cache;
129         u64 len = *ret_len;
130         u64 disk_bytenr;
131         int i;
132         int ret;
133
134         if (bytenr != round_down(bytenr, root->sectorsize)) {
135                 error("bytenr not sectorsize aligned: %llu",
136                                 (unsigned long long)bytenr);
137                 return -EINVAL;
138         }
139         if (len != round_down(len, root->sectorsize)) {
140                 error("length not sectorsize aligned: %llu",
141                                 (unsigned long long)len);
142                 return -EINVAL;
143         }
144         len = min_t(u64, len, BTRFS_MAX_EXTENT_SIZE);
145
146         /*
147          * Skip sb ranges first
148          * [0, 1M), [sb_offset(1), +64K), [sb_offset(2), +64K].
149          *
150          * Or we will insert a hole into current image file, and later
151          * migrate block will fail as there is already a file extent.
152          */
153         if (bytenr < 1024 * 1024) {
154                 *ret_len = 1024 * 1024 - bytenr;
155                 return 0;
156         }
157         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
158                 u64 cur = btrfs_sb_offset(i);
159
160                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
161                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
162                         return 0;
163                 }
164         }
165         for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
166                 u64 cur = btrfs_sb_offset(i);
167
168                 /*
169                  *      |--reserved--|
170                  * |----range-------|
171                  * May still need to go through file extent inserts
172                  */
173                 if (bytenr < cur && bytenr + len >= cur) {
174                         len = min_t(u64, len, cur - bytenr);
175                         break;
176                 }
177                 /*
178                  * |--reserved--|
179                  *      |---range---|
180                  * Drop out, no need to insert anything
181                  */
182                 if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) {
183                         *ret_len = cur + BTRFS_STRIPE_LEN - bytenr;
184                         return 0;
185                 }
186         }
187
188         cache = search_cache_extent(used, bytenr);
189         if (cache) {
190                 if (cache->start <= bytenr) {
191                         /*
192                          * |///////Used///////|
193                          *      |<--insert--->|
194                          *      bytenr
195                          */
196                         len = min_t(u64, len, cache->start + cache->size -
197                                     bytenr);
198                         disk_bytenr = bytenr;
199                 } else {
200                         /*
201                          *              |//Used//|
202                          *  |<-insert-->|
203                          *  bytenr
204                          */
205                         len = min(len, cache->start - bytenr);
206                         disk_bytenr = 0;
207                         datacsum = 0;
208                 }
209         } else {
210                 /*
211                  * |//Used//|           |EOF
212                  *          |<-insert-->|
213                  *          bytenr
214                  */
215                 disk_bytenr = 0;
216                 datacsum = 0;
217         }
218
219         if (disk_bytenr) {
220                 /* Check if the range is in a data block group */
221                 bg_cache = btrfs_lookup_block_group(root->fs_info, bytenr);
222                 if (!bg_cache)
223                         return -ENOENT;
224                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
225                         return -EINVAL;
226
227                 /* The extent should never cross block group boundary */
228                 len = min_t(u64, len, bg_cache->key.objectid +
229                             bg_cache->key.offset - bytenr);
230         }
231
232         if (len != round_down(len, root->sectorsize)) {
233                 error("remaining length not sectorsize aligned: %llu",
234                                 (unsigned long long)len);
235                 return -EINVAL;
236         }
237         ret = btrfs_record_file_extent(trans, root, ino, inode, bytenr,
238                                        disk_bytenr, len);
239         if (ret < 0)
240                 return ret;
241
242         if (datacsum)
243                 ret = csum_disk_extent(trans, root, bytenr, len);
244         *ret_len = len;
245         return ret;
246 }
247
248 /*
249  * Relocate old fs data in one reserved ranges
250  *
251  * Since all old fs data in reserved range is not covered by any chunk nor
252  * data extent, we don't need to handle any reference but add new
253  * extent/reference, which makes codes more clear
254  */
255 static int migrate_one_reserved_range(struct btrfs_trans_handle *trans,
256                                       struct btrfs_root *root,
257                                       struct cache_tree *used,
258                                       struct btrfs_inode_item *inode, int fd,
259                                       u64 ino, u64 start, u64 len, int datacsum)
260 {
261         u64 cur_off = start;
262         u64 cur_len = len;
263         u64 hole_start = start;
264         u64 hole_len;
265         struct cache_extent *cache;
266         struct btrfs_key key;
267         struct extent_buffer *eb;
268         int ret = 0;
269
270         while (cur_off < start + len) {
271                 cache = lookup_cache_extent(used, cur_off, cur_len);
272                 if (!cache)
273                         break;
274                 cur_off = max(cache->start, cur_off);
275                 cur_len = min(cache->start + cache->size, start + len) -
276                           cur_off;
277                 BUG_ON(cur_len < root->sectorsize);
278
279                 /* reserve extent for the data */
280                 ret = btrfs_reserve_extent(trans, root, cur_len, 0, 0, (u64)-1,
281                                            &key, 1);
282                 if (ret < 0)
283                         break;
284
285                 eb = malloc(sizeof(*eb) + cur_len);
286                 if (!eb) {
287                         ret = -ENOMEM;
288                         break;
289                 }
290
291                 ret = pread(fd, eb->data, cur_len, cur_off);
292                 if (ret < cur_len) {
293                         ret = (ret < 0 ? ret : -EIO);
294                         free(eb);
295                         break;
296                 }
297                 eb->start = key.objectid;
298                 eb->len = key.offset;
299
300                 /* Write the data */
301                 ret = write_and_map_eb(trans, root, eb);
302                 free(eb);
303                 if (ret < 0)
304                         break;
305
306                 /* Now handle extent item and file extent things */
307                 ret = btrfs_record_file_extent(trans, root, ino, inode, cur_off,
308                                                key.objectid, key.offset);
309                 if (ret < 0)
310                         break;
311                 /* Finally, insert csum items */
312                 if (datacsum)
313                         ret = csum_disk_extent(trans, root, key.objectid,
314                                                key.offset);
315
316                 /* Don't forget to insert hole */
317                 hole_len = cur_off - hole_start;
318                 if (hole_len) {
319                         ret = btrfs_record_file_extent(trans, root, ino, inode,
320                                         hole_start, 0, hole_len);
321                         if (ret < 0)
322                                 break;
323                 }
324
325                 cur_off += key.offset;
326                 hole_start = cur_off;
327                 cur_len = start + len - cur_off;
328         }
329         /* Last hole */
330         if (start + len - hole_start > 0)
331                 ret = btrfs_record_file_extent(trans, root, ino, inode,
332                                 hole_start, 0, start + len - hole_start);
333         return ret;
334 }
335
336 /*
337  * Relocate the used ext2 data in reserved ranges
338  * [0,1M)
339  * [btrfs_sb_offset(1), +BTRFS_STRIPE_LEN)
340  * [btrfs_sb_offset(2), +BTRFS_STRIPE_LEN)
341  */
342 static int migrate_reserved_ranges(struct btrfs_trans_handle *trans,
343                                    struct btrfs_root *root,
344                                    struct cache_tree *used,
345                                    struct btrfs_inode_item *inode, int fd,
346                                    u64 ino, u64 total_bytes, int datacsum)
347 {
348         u64 cur_off;
349         u64 cur_len;
350         int ret = 0;
351
352         /* 0 ~ 1M */
353         cur_off = 0;
354         cur_len = 1024 * 1024;
355         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
356                                          cur_off, cur_len, datacsum);
357         if (ret < 0)
358                 return ret;
359
360         /* second sb(fisrt sb is included in 0~1M) */
361         cur_off = btrfs_sb_offset(1);
362         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
363         if (cur_off > total_bytes)
364                 return ret;
365         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
366                                          cur_off, cur_len, datacsum);
367         if (ret < 0)
368                 return ret;
369
370         /* Last sb */
371         cur_off = btrfs_sb_offset(2);
372         cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off;
373         if (cur_off > total_bytes)
374                 return ret;
375         ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino,
376                                          cur_off, cur_len, datacsum);
377         return ret;
378 }
379
380 /*
381  * Helper for expand and merge extent_cache for wipe_one_reserved_range() to
382  * handle wiping a range that exists in cache.
383  */
384 static int _expand_extent_cache(struct cache_tree *tree,
385                                 struct cache_extent *entry,
386                                 u64 min_stripe_size, int backward)
387 {
388         struct cache_extent *ce;
389         int diff;
390
391         if (entry->size >= min_stripe_size)
392                 return 0;
393         diff = min_stripe_size - entry->size;
394
395         if (backward) {
396                 ce = prev_cache_extent(entry);
397                 if (!ce)
398                         goto expand_back;
399                 if (ce->start + ce->size >= entry->start - diff) {
400                         /* Directly merge with previous extent */
401                         ce->size = entry->start + entry->size - ce->start;
402                         remove_cache_extent(tree, entry);
403                         free(entry);
404                         return 0;
405                 }
406 expand_back:
407                 /* No overlap, normal extent */
408                 if (entry->start < diff) {
409                         error("cannot find space for data chunk layout");
410                         return -ENOSPC;
411                 }
412                 entry->start -= diff;
413                 entry->size += diff;
414                 return 0;
415         }
416         ce = next_cache_extent(entry);
417         if (!ce)
418                 goto expand_after;
419         if (entry->start + entry->size + diff >= ce->start) {
420                 /* Directly merge with next extent */
421                 entry->size = ce->start + ce->size - entry->start;
422                 remove_cache_extent(tree, ce);
423                 free(ce);
424                 return 0;
425         }
426 expand_after:
427         entry->size += diff;
428         return 0;
429 }
430
431 /*
432  * Remove one reserve range from given cache tree
433  * if min_stripe_size is non-zero, it will ensure for split case,
434  * all its split cache extent is no smaller than @min_strip_size / 2.
435  */
436 static int wipe_one_reserved_range(struct cache_tree *tree,
437                                    u64 start, u64 len, u64 min_stripe_size,
438                                    int ensure_size)
439 {
440         struct cache_extent *cache;
441         int ret;
442
443         BUG_ON(ensure_size && min_stripe_size == 0);
444         /*
445          * The logical here is simplified to handle special cases only
446          * So we don't need to consider merge case for ensure_size
447          */
448         BUG_ON(min_stripe_size && (min_stripe_size < len * 2 ||
449                min_stripe_size / 2 < BTRFS_STRIPE_LEN));
450
451         /* Also, wipe range should already be aligned */
452         BUG_ON(start != round_down(start, BTRFS_STRIPE_LEN) ||
453                start + len != round_up(start + len, BTRFS_STRIPE_LEN));
454
455         min_stripe_size /= 2;
456
457         cache = lookup_cache_extent(tree, start, len);
458         if (!cache)
459                 return 0;
460
461         if (start <= cache->start) {
462                 /*
463                  *      |--------cache---------|
464                  * |-wipe-|
465                  */
466                 BUG_ON(start + len <= cache->start);
467
468                 /*
469                  * The wipe size is smaller than min_stripe_size / 2,
470                  * so the result length should still meet min_stripe_size
471                  * And no need to do alignment
472                  */
473                 cache->size -= (start + len - cache->start);
474                 if (cache->size == 0) {
475                         remove_cache_extent(tree, cache);
476                         free(cache);
477                         return 0;
478                 }
479
480                 BUG_ON(ensure_size && cache->size < min_stripe_size);
481
482                 cache->start = start + len;
483                 return 0;
484         } else if (start > cache->start && start + len < cache->start +
485                    cache->size) {
486                 /*
487                  * |-------cache-----|
488                  *      |-wipe-|
489                  */
490                 u64 old_start = cache->start;
491                 u64 old_len = cache->size;
492                 u64 insert_start = start + len;
493                 u64 insert_len;
494
495                 cache->size = start - cache->start;
496                 /* Expand the leading half part if needed */
497                 if (ensure_size && cache->size < min_stripe_size) {
498                         ret = _expand_extent_cache(tree, cache,
499                                         min_stripe_size, 1);
500                         if (ret < 0)
501                                 return ret;
502                 }
503
504                 /* And insert the new one */
505                 insert_len = old_start + old_len - start - len;
506                 ret = add_merge_cache_extent(tree, insert_start, insert_len);
507                 if (ret < 0)
508                         return ret;
509
510                 /* Expand the last half part if needed */
511                 if (ensure_size && insert_len < min_stripe_size) {
512                         cache = lookup_cache_extent(tree, insert_start,
513                                                     insert_len);
514                         if (!cache || cache->start != insert_start ||
515                             cache->size != insert_len)
516                                 return -ENOENT;
517                         ret = _expand_extent_cache(tree, cache,
518                                         min_stripe_size, 0);
519                 }
520
521                 return ret;
522         }
523         /*
524          * |----cache-----|
525          *              |--wipe-|
526          * Wipe len should be small enough and no need to expand the
527          * remaining extent
528          */
529         cache->size = start - cache->start;
530         BUG_ON(ensure_size && cache->size < min_stripe_size);
531         return 0;
532 }
533
534 /*
535  * Remove reserved ranges from given cache_tree
536  *
537  * It will remove the following ranges
538  * 1) 0~1M
539  * 2) 2nd superblock, +64K (make sure chunks are 64K aligned)
540  * 3) 3rd superblock, +64K
541  *
542  * @min_stripe must be given for safety check
543  * and if @ensure_size is given, it will ensure affected cache_extent will be
544  * larger than min_stripe_size
545  */
546 static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size,
547                                 int ensure_size)
548 {
549         int ret;
550
551         ret = wipe_one_reserved_range(tree, 0, 1024 * 1024, min_stripe_size,
552                                       ensure_size);
553         if (ret < 0)
554                 return ret;
555         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(1),
556                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
557         if (ret < 0)
558                 return ret;
559         ret = wipe_one_reserved_range(tree, btrfs_sb_offset(2),
560                         BTRFS_STRIPE_LEN, min_stripe_size, ensure_size);
561         return ret;
562 }
563
564 static int calculate_available_space(struct btrfs_convert_context *cctx)
565 {
566         struct cache_tree *used = &cctx->used;
567         struct cache_tree *data_chunks = &cctx->data_chunks;
568         struct cache_tree *free = &cctx->free;
569         struct cache_extent *cache;
570         u64 cur_off = 0;
571         /*
572          * Twice the minimal chunk size, to allow later wipe_reserved_ranges()
573          * works without need to consider overlap
574          */
575         u64 min_stripe_size = 2 * 16 * 1024 * 1024;
576         int ret;
577
578         /* Calculate data_chunks */
579         for (cache = first_cache_extent(used); cache;
580              cache = next_cache_extent(cache)) {
581                 u64 cur_len;
582
583                 if (cache->start + cache->size < cur_off)
584                         continue;
585                 if (cache->start > cur_off + min_stripe_size)
586                         cur_off = cache->start;
587                 cur_len = max(cache->start + cache->size - cur_off,
588                               min_stripe_size);
589                 ret = add_merge_cache_extent(data_chunks, cur_off, cur_len);
590                 if (ret < 0)
591                         goto out;
592                 cur_off += cur_len;
593         }
594         /*
595          * remove reserved ranges, so we won't ever bother relocating an old
596          * filesystem extent to other place.
597          */
598         ret = wipe_reserved_ranges(data_chunks, min_stripe_size, 1);
599         if (ret < 0)
600                 goto out;
601
602         cur_off = 0;
603         /*
604          * Calculate free space
605          * Always round up the start bytenr, to avoid metadata extent corss
606          * stripe boundary, as later mkfs_convert() won't have all the extent
607          * allocation check
608          */
609         for (cache = first_cache_extent(data_chunks); cache;
610              cache = next_cache_extent(cache)) {
611                 if (cache->start < cur_off)
612                         continue;
613                 if (cache->start > cur_off) {
614                         u64 insert_start;
615                         u64 len;
616
617                         len = cache->start - round_up(cur_off,
618                                                       BTRFS_STRIPE_LEN);
619                         insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
620
621                         ret = add_merge_cache_extent(free, insert_start, len);
622                         if (ret < 0)
623                                 goto out;
624                 }
625                 cur_off = cache->start + cache->size;
626         }
627         /* Don't forget the last range */
628         if (cctx->total_bytes > cur_off) {
629                 u64 len = cctx->total_bytes - cur_off;
630                 u64 insert_start;
631
632                 insert_start = round_up(cur_off, BTRFS_STRIPE_LEN);
633
634                 ret = add_merge_cache_extent(free, insert_start, len);
635                 if (ret < 0)
636                         goto out;
637         }
638
639         /* Remove reserved bytes */
640         ret = wipe_reserved_ranges(free, min_stripe_size, 0);
641 out:
642         return ret;
643 }
644
645 /*
646  * Read used space, and since we have the used space,
647  * calcuate data_chunks and free for later mkfs
648  */
649 static int convert_read_used_space(struct btrfs_convert_context *cctx)
650 {
651         int ret;
652
653         ret = cctx->convert_ops->read_used_space(cctx);
654         if (ret)
655                 return ret;
656
657         ret = calculate_available_space(cctx);
658         return ret;
659 }
660
661 /*
662  * Create the fs image file of old filesystem.
663  *
664  * This is completely fs independent as we have cctx->used, only
665  * need to create file extents pointing to all the positions.
666  */
667 static int create_image(struct btrfs_root *root,
668                            struct btrfs_mkfs_config *cfg,
669                            struct btrfs_convert_context *cctx, int fd,
670                            u64 size, char *name, int datacsum)
671 {
672         struct btrfs_inode_item buf;
673         struct btrfs_trans_handle *trans;
674         struct btrfs_path path;
675         struct btrfs_key key;
676         struct cache_extent *cache;
677         struct cache_tree used_tmp;
678         u64 cur;
679         u64 ino;
680         u64 flags = BTRFS_INODE_READONLY;
681         int ret;
682
683         if (!datacsum)
684                 flags |= BTRFS_INODE_NODATASUM;
685
686         trans = btrfs_start_transaction(root, 1);
687         if (!trans)
688                 return -ENOMEM;
689
690         cache_tree_init(&used_tmp);
691         btrfs_init_path(&path);
692
693         ret = btrfs_find_free_objectid(trans, root, BTRFS_FIRST_FREE_OBJECTID,
694                                        &ino);
695         if (ret < 0)
696                 goto out;
697         ret = btrfs_new_inode(trans, root, ino, 0400 | S_IFREG);
698         if (ret < 0)
699                 goto out;
700         ret = btrfs_change_inode_flags(trans, root, ino, flags);
701         if (ret < 0)
702                 goto out;
703         ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name,
704                              strlen(name), BTRFS_FT_REG_FILE, NULL, 1);
705         if (ret < 0)
706                 goto out;
707
708         key.objectid = ino;
709         key.type = BTRFS_INODE_ITEM_KEY;
710         key.offset = 0;
711
712         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
713         if (ret) {
714                 ret = (ret > 0 ? -ENOENT : ret);
715                 goto out;
716         }
717         read_extent_buffer(path.nodes[0], &buf,
718                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
719                         sizeof(buf));
720         btrfs_release_path(&path);
721
722         /*
723          * Create a new used space cache, which doesn't contain the reserved
724          * range
725          */
726         for (cache = first_cache_extent(&cctx->used); cache;
727              cache = next_cache_extent(cache)) {
728                 ret = add_cache_extent(&used_tmp, cache->start, cache->size);
729                 if (ret < 0)
730                         goto out;
731         }
732         ret = wipe_reserved_ranges(&used_tmp, 0, 0);
733         if (ret < 0)
734                 goto out;
735
736         /*
737          * Start from 1M, as 0~1M is reserved, and create_image_file_range()
738          * can't handle bytenr 0(will consider it as a hole)
739          */
740         cur = 1024 * 1024;
741         while (cur < size) {
742                 u64 len = size - cur;
743
744                 ret = create_image_file_range(trans, root, &used_tmp,
745                                                 &buf, ino, cur, &len, datacsum);
746                 if (ret < 0)
747                         goto out;
748                 cur += len;
749         }
750         /* Handle the reserved ranges */
751         ret = migrate_reserved_ranges(trans, root, &cctx->used, &buf, fd, ino,
752                                       cfg->num_bytes, datacsum);
753
754
755         key.objectid = ino;
756         key.type = BTRFS_INODE_ITEM_KEY;
757         key.offset = 0;
758         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
759         if (ret) {
760                 ret = (ret > 0 ? -ENOENT : ret);
761                 goto out;
762         }
763         btrfs_set_stack_inode_size(&buf, cfg->num_bytes);
764         write_extent_buffer(path.nodes[0], &buf,
765                         btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
766                         sizeof(buf));
767 out:
768         free_extent_cache_tree(&used_tmp);
769         btrfs_release_path(&path);
770         btrfs_commit_transaction(trans, root);
771         return ret;
772 }
773
774 static struct btrfs_root* link_subvol(struct btrfs_root *root,
775                 const char *base, u64 root_objectid)
776 {
777         struct btrfs_trans_handle *trans;
778         struct btrfs_fs_info *fs_info = root->fs_info;
779         struct btrfs_root *tree_root = fs_info->tree_root;
780         struct btrfs_root *new_root = NULL;
781         struct btrfs_path path;
782         struct btrfs_inode_item *inode_item;
783         struct extent_buffer *leaf;
784         struct btrfs_key key;
785         u64 dirid = btrfs_root_dirid(&root->root_item);
786         u64 index = 2;
787         char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
788         int len;
789         int i;
790         int ret;
791
792         len = strlen(base);
793         if (len == 0 || len > BTRFS_NAME_LEN)
794                 return NULL;
795
796         btrfs_init_path(&path);
797         key.objectid = dirid;
798         key.type = BTRFS_DIR_INDEX_KEY;
799         key.offset = (u64)-1;
800
801         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
802         if (ret <= 0) {
803                 error("search for DIR_INDEX dirid %llu failed: %d",
804                                 (unsigned long long)dirid, ret);
805                 goto fail;
806         }
807
808         if (path.slots[0] > 0) {
809                 path.slots[0]--;
810                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
811                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
812                         index = key.offset + 1;
813         }
814         btrfs_release_path(&path);
815
816         trans = btrfs_start_transaction(root, 1);
817         if (!trans) {
818                 error("unable to start transaction");
819                 goto fail;
820         }
821
822         key.objectid = dirid;
823         key.offset = 0;
824         key.type =  BTRFS_INODE_ITEM_KEY;
825
826         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
827         if (ret) {
828                 error("search for INODE_ITEM %llu failed: %d",
829                                 (unsigned long long)dirid, ret);
830                 goto fail;
831         }
832         leaf = path.nodes[0];
833         inode_item = btrfs_item_ptr(leaf, path.slots[0],
834                                     struct btrfs_inode_item);
835
836         key.objectid = root_objectid;
837         key.offset = (u64)-1;
838         key.type = BTRFS_ROOT_ITEM_KEY;
839
840         memcpy(buf, base, len);
841         for (i = 0; i < 1024; i++) {
842                 ret = btrfs_insert_dir_item(trans, root, buf, len,
843                                             dirid, &key, BTRFS_FT_DIR, index);
844                 if (ret != -EEXIST)
845                         break;
846                 len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
847                 if (len < 1 || len > BTRFS_NAME_LEN) {
848                         ret = -EINVAL;
849                         break;
850                 }
851         }
852         if (ret)
853                 goto fail;
854
855         btrfs_set_inode_size(leaf, inode_item, len * 2 +
856                              btrfs_inode_size(leaf, inode_item));
857         btrfs_mark_buffer_dirty(leaf);
858         btrfs_release_path(&path);
859
860         /* add the backref first */
861         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
862                                  BTRFS_ROOT_BACKREF_KEY,
863                                  root->root_key.objectid,
864                                  dirid, index, buf, len);
865         if (ret) {
866                 error("unable to add root backref for %llu: %d",
867                                 root->root_key.objectid, ret);
868                 goto fail;
869         }
870
871         /* now add the forward ref */
872         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
873                                  BTRFS_ROOT_REF_KEY, root_objectid,
874                                  dirid, index, buf, len);
875         if (ret) {
876                 error("unable to add root ref for %llu: %d",
877                                 root->root_key.objectid, ret);
878                 goto fail;
879         }
880
881         ret = btrfs_commit_transaction(trans, root);
882         if (ret) {
883                 error("transaction commit failed: %d", ret);
884                 goto fail;
885         }
886
887         new_root = btrfs_read_fs_root(fs_info, &key);
888         if (IS_ERR(new_root)) {
889                 error("unable to fs read root: %lu", PTR_ERR(new_root));
890                 new_root = NULL;
891         }
892 fail:
893         btrfs_init_path(&path);
894         return new_root;
895 }
896
897 static int create_subvol(struct btrfs_trans_handle *trans,
898                          struct btrfs_root *root, u64 root_objectid)
899 {
900         struct extent_buffer *tmp;
901         struct btrfs_root *new_root;
902         struct btrfs_key key;
903         struct btrfs_root_item root_item;
904         int ret;
905
906         ret = btrfs_copy_root(trans, root, root->node, &tmp,
907                               root_objectid);
908         if (ret)
909                 return ret;
910
911         memcpy(&root_item, &root->root_item, sizeof(root_item));
912         btrfs_set_root_bytenr(&root_item, tmp->start);
913         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
914         btrfs_set_root_generation(&root_item, trans->transid);
915         free_extent_buffer(tmp);
916
917         key.objectid = root_objectid;
918         key.type = BTRFS_ROOT_ITEM_KEY;
919         key.offset = trans->transid;
920         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
921                                 &key, &root_item);
922
923         key.offset = (u64)-1;
924         new_root = btrfs_read_fs_root(root->fs_info, &key);
925         if (!new_root || IS_ERR(new_root)) {
926                 error("unable to fs read root: %lu", PTR_ERR(new_root));
927                 return PTR_ERR(new_root);
928         }
929
930         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
931
932         return ret;
933 }
934
935 /*
936  * New make_btrfs() has handle system and meta chunks quite well.
937  * So only need to add remaining data chunks.
938  */
939 static int make_convert_data_block_groups(struct btrfs_trans_handle *trans,
940                                           struct btrfs_fs_info *fs_info,
941                                           struct btrfs_mkfs_config *cfg,
942                                           struct btrfs_convert_context *cctx)
943 {
944         struct btrfs_root *extent_root = fs_info->extent_root;
945         struct cache_tree *data_chunks = &cctx->data_chunks;
946         struct cache_extent *cache;
947         u64 max_chunk_size;
948         int ret = 0;
949
950         /*
951          * Don't create data chunk over 10% of the convert device
952          * And for single chunk, don't create chunk larger than 1G.
953          */
954         max_chunk_size = cfg->num_bytes / 10;
955         max_chunk_size = min((u64)(1024 * 1024 * 1024), max_chunk_size);
956         max_chunk_size = round_down(max_chunk_size, extent_root->sectorsize);
957
958         for (cache = first_cache_extent(data_chunks); cache;
959              cache = next_cache_extent(cache)) {
960                 u64 cur = cache->start;
961
962                 while (cur < cache->start + cache->size) {
963                         u64 len;
964                         u64 cur_backup = cur;
965
966                         len = min(max_chunk_size,
967                                   cache->start + cache->size - cur);
968                         ret = btrfs_alloc_data_chunk(trans, extent_root,
969                                         &cur_backup, len,
970                                         BTRFS_BLOCK_GROUP_DATA, 1);
971                         if (ret < 0)
972                                 break;
973                         ret = btrfs_make_block_group(trans, extent_root, 0,
974                                         BTRFS_BLOCK_GROUP_DATA,
975                                         BTRFS_FIRST_CHUNK_TREE_OBJECTID,
976                                         cur, len);
977                         if (ret < 0)
978                                 break;
979                         cur += len;
980                 }
981         }
982         return ret;
983 }
984
985 /*
986  * Init the temp btrfs to a operational status.
987  *
988  * It will fix the extent usage accounting(XXX: Do we really need?) and
989  * insert needed data chunks, to ensure all old fs data extents are covered
990  * by DATA chunks, preventing wrong chunks are allocated.
991  *
992  * And also create convert image subvolume and relocation tree.
993  * (XXX: Not need again?)
994  * But the convert image subvolume is *NOT* linked to fs tree yet.
995  */
996 static int init_btrfs(struct btrfs_mkfs_config *cfg, struct btrfs_root *root,
997                          struct btrfs_convert_context *cctx, int datacsum,
998                          int packing, int noxattr)
999 {
1000         struct btrfs_key location;
1001         struct btrfs_trans_handle *trans;
1002         struct btrfs_fs_info *fs_info = root->fs_info;
1003         int ret;
1004
1005         /*
1006          * Don't alloc any metadata/system chunk, as we don't want
1007          * any meta/sys chunk allcated before all data chunks are inserted.
1008          * Or we screw up the chunk layout just like the old implement.
1009          */
1010         fs_info->avoid_sys_chunk_alloc = 1;
1011         fs_info->avoid_meta_chunk_alloc = 1;
1012         trans = btrfs_start_transaction(root, 1);
1013         if (!trans) {
1014                 error("unable to start transaction");
1015                 ret = -EINVAL;
1016                 goto err;
1017         }
1018         ret = btrfs_fix_block_accounting(trans, root);
1019         if (ret)
1020                 goto err;
1021         ret = make_convert_data_block_groups(trans, fs_info, cfg, cctx);
1022         if (ret)
1023                 goto err;
1024         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1025                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1026         if (ret)
1027                 goto err;
1028         memcpy(&location, &root->root_key, sizeof(location));
1029         location.offset = (u64)-1;
1030         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1031                                 btrfs_super_root_dir(fs_info->super_copy),
1032                                 &location, BTRFS_FT_DIR, 0);
1033         if (ret)
1034                 goto err;
1035         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1036                                 location.objectid,
1037                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1038         if (ret)
1039                 goto err;
1040         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1041                              BTRFS_FIRST_FREE_OBJECTID);
1042
1043         /* subvol for fs image file */
1044         ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID);
1045         if (ret < 0) {
1046                 error("failed to create subvolume image root: %d", ret);
1047                 goto err;
1048         }
1049         /* subvol for data relocation tree */
1050         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1051         if (ret < 0) {
1052                 error("failed to create DATA_RELOC root: %d", ret);
1053                 goto err;
1054         }
1055
1056         ret = btrfs_commit_transaction(trans, root);
1057         fs_info->avoid_sys_chunk_alloc = 0;
1058         fs_info->avoid_meta_chunk_alloc = 0;
1059 err:
1060         return ret;
1061 }
1062
1063 /*
1064  * Migrate super block to its default position and zero 0 ~ 16k
1065  */
1066 static int migrate_super_block(int fd, u64 old_bytenr)
1067 {
1068         int ret;
1069         struct extent_buffer *buf;
1070         struct btrfs_super_block *super;
1071         u32 len;
1072         u32 bytenr;
1073
1074         buf = malloc(sizeof(*buf) + BTRFS_SUPER_INFO_SIZE);
1075         if (!buf)
1076                 return -ENOMEM;
1077
1078         buf->len = BTRFS_SUPER_INFO_SIZE;
1079         ret = pread(fd, buf->data, BTRFS_SUPER_INFO_SIZE, old_bytenr);
1080         if (ret != BTRFS_SUPER_INFO_SIZE)
1081                 goto fail;
1082
1083         super = (struct btrfs_super_block *)buf->data;
1084         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1085         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1086
1087         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1088         ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE,
1089                 BTRFS_SUPER_INFO_OFFSET);
1090         if (ret != BTRFS_SUPER_INFO_SIZE)
1091                 goto fail;
1092
1093         ret = fsync(fd);
1094         if (ret)
1095                 goto fail;
1096
1097         memset(buf->data, 0, BTRFS_SUPER_INFO_SIZE);
1098         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
1099                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
1100                 if (len > BTRFS_SUPER_INFO_SIZE)
1101                         len = BTRFS_SUPER_INFO_SIZE;
1102                 ret = pwrite(fd, buf->data, len, bytenr);
1103                 if (ret != len) {
1104                         fprintf(stderr, "unable to zero fill device\n");
1105                         break;
1106                 }
1107                 bytenr += len;
1108         }
1109         ret = 0;
1110         fsync(fd);
1111 fail:
1112         free(buf);
1113         if (ret > 0)
1114                 ret = -1;
1115         return ret;
1116 }
1117
1118 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
1119 {
1120         struct btrfs_chunk *chunk;
1121         struct btrfs_disk_key *key;
1122         u32 sectorsize = btrfs_super_sectorsize(super);
1123
1124         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1125         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1126                                        sizeof(struct btrfs_disk_key));
1127
1128         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1129         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1130         btrfs_set_disk_key_offset(key, 0);
1131
1132         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
1133         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1134         btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
1135         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1136         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1137         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1138         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1139         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1140         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1141         chunk->stripe.devid = super->dev_item.devid;
1142         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1143         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1144         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1145         return 0;
1146 }
1147
1148 const struct btrfs_convert_operations ext2_convert_ops;
1149
1150 static const struct btrfs_convert_operations *convert_operations[] = {
1151 #if BTRFSCONVERT_EXT2
1152         &ext2_convert_ops,
1153 #endif
1154 };
1155
1156 static int convert_open_fs(const char *devname,
1157                            struct btrfs_convert_context *cctx)
1158 {
1159         int i;
1160
1161         memset(cctx, 0, sizeof(*cctx));
1162
1163         for (i = 0; i < ARRAY_SIZE(convert_operations); i++) {
1164                 int ret = convert_operations[i]->open_fs(cctx, devname);
1165
1166                 if (ret == 0) {
1167                         cctx->convert_ops = convert_operations[i];
1168                         return ret;
1169                 }
1170         }
1171
1172         error("no file system found to convert");
1173         return -1;
1174 }
1175
1176 static int do_convert(const char *devname, int datacsum, int packing,
1177                 int noxattr, u32 nodesize, int copylabel, const char *fslabel,
1178                 int progress, u64 features)
1179 {
1180         int ret;
1181         int fd = -1;
1182         u32 blocksize;
1183         u64 total_bytes;
1184         struct btrfs_root *root;
1185         struct btrfs_root *image_root;
1186         struct btrfs_convert_context cctx;
1187         struct btrfs_key key;
1188         char *subvol_name = NULL;
1189         struct task_ctx ctx;
1190         char features_buf[64];
1191         struct btrfs_mkfs_config mkfs_cfg;
1192
1193         init_convert_context(&cctx);
1194         ret = convert_open_fs(devname, &cctx);
1195         if (ret)
1196                 goto fail;
1197         ret = convert_check_state(&cctx);
1198         if (ret)
1199                 warning(
1200                 "source filesystem is not clean, running filesystem check is recommended");
1201         ret = convert_read_used_space(&cctx);
1202         if (ret)
1203                 goto fail;
1204
1205         blocksize = cctx.blocksize;
1206         total_bytes = (u64)blocksize * (u64)cctx.block_count;
1207         if (blocksize < 4096) {
1208                 error("block size is too small: %u < 4096", blocksize);
1209                 goto fail;
1210         }
1211         if (btrfs_check_nodesize(nodesize, blocksize, features))
1212                 goto fail;
1213         fd = open(devname, O_RDWR);
1214         if (fd < 0) {
1215                 error("unable to open %s: %s", devname, strerror(errno));
1216                 goto fail;
1217         }
1218         btrfs_parse_features_to_string(features_buf, features);
1219         if (features == BTRFS_MKFS_DEFAULT_FEATURES)
1220                 strcat(features_buf, " (default)");
1221
1222         printf("create btrfs filesystem:\n");
1223         printf("\tblocksize: %u\n", blocksize);
1224         printf("\tnodesize:  %u\n", nodesize);
1225         printf("\tfeatures:  %s\n", features_buf);
1226
1227         mkfs_cfg.label = cctx.volume_name;
1228         mkfs_cfg.num_bytes = total_bytes;
1229         mkfs_cfg.nodesize = nodesize;
1230         mkfs_cfg.sectorsize = blocksize;
1231         mkfs_cfg.stripesize = blocksize;
1232         mkfs_cfg.features = features;
1233         /* New convert need these space */
1234         memset(mkfs_cfg.chunk_uuid, 0, BTRFS_UUID_UNPARSED_SIZE);
1235         memset(mkfs_cfg.fs_uuid, 0, BTRFS_UUID_UNPARSED_SIZE);
1236
1237         ret = make_convert_btrfs(fd, &mkfs_cfg, &cctx);
1238         if (ret) {
1239                 error("unable to create initial ctree: %s", strerror(-ret));
1240                 goto fail;
1241         }
1242
1243         root = open_ctree_fd(fd, devname, mkfs_cfg.super_bytenr,
1244                              OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1245         if (!root) {
1246                 error("unable to open ctree");
1247                 goto fail;
1248         }
1249         ret = init_btrfs(&mkfs_cfg, root, &cctx, datacsum, packing, noxattr);
1250         if (ret) {
1251                 error("unable to setup the root tree: %d", ret);
1252                 goto fail;
1253         }
1254
1255         printf("creating %s image file\n", cctx.convert_ops->name);
1256         ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name);
1257         if (ret < 0) {
1258                 error("memory allocation failure for subvolume name: %s_saved",
1259                         cctx.convert_ops->name);
1260                 goto fail;
1261         }
1262         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1263         key.offset = (u64)-1;
1264         key.type = BTRFS_ROOT_ITEM_KEY;
1265         image_root = btrfs_read_fs_root(root->fs_info, &key);
1266         if (!image_root) {
1267                 error("unable to create image subvolume");
1268                 goto fail;
1269         }
1270         ret = create_image(image_root, &mkfs_cfg, &cctx, fd,
1271                               mkfs_cfg.num_bytes, "image", datacsum);
1272         if (ret) {
1273                 error("failed to create %s/image: %d", subvol_name, ret);
1274                 goto fail;
1275         }
1276
1277         printf("creating btrfs metadata");
1278         ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count);
1279         ctx.cur_copy_inodes = 0;
1280
1281         if (progress) {
1282                 ctx.info = task_init(print_copied_inodes, after_copied_inodes,
1283                                      &ctx);
1284                 task_start(ctx.info);
1285         }
1286         ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx);
1287         if (ret) {
1288                 error("error during copy_inodes %d", ret);
1289                 goto fail;
1290         }
1291         if (progress) {
1292                 task_stop(ctx.info);
1293                 task_deinit(ctx.info);
1294         }
1295
1296         image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID);
1297         if (!image_root) {
1298                 error("unable to link subvolume %s", subvol_name);
1299                 goto fail;
1300         }
1301
1302         free(subvol_name);
1303
1304         memset(root->fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE);
1305         if (copylabel == 1) {
1306                 __strncpy_null(root->fs_info->super_copy->label,
1307                                 cctx.volume_name, BTRFS_LABEL_SIZE - 1);
1308                 printf("copy label '%s'\n", root->fs_info->super_copy->label);
1309         } else if (copylabel == -1) {
1310                 strcpy(root->fs_info->super_copy->label, fslabel);
1311                 printf("set label to '%s'\n", fslabel);
1312         }
1313
1314         ret = close_ctree(root);
1315         if (ret) {
1316                 error("close_ctree failed: %d", ret);
1317                 goto fail;
1318         }
1319         convert_close_fs(&cctx);
1320         clean_convert_context(&cctx);
1321
1322         /*
1323          * If this step succeed, we get a mountable btrfs. Otherwise
1324          * the source fs is left unchanged.
1325          */
1326         ret = migrate_super_block(fd, mkfs_cfg.super_bytenr);
1327         if (ret) {
1328                 error("unable to migrate super block: %d", ret);
1329                 goto fail;
1330         }
1331
1332         root = open_ctree_fd(fd, devname, 0,
1333                         OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL);
1334         if (!root) {
1335                 error("unable to open ctree for finalization");
1336                 goto fail;
1337         }
1338         root->fs_info->finalize_on_close = 1;
1339         close_ctree(root);
1340         close(fd);
1341
1342         printf("conversion complete");
1343         return 0;
1344 fail:
1345         clean_convert_context(&cctx);
1346         if (fd != -1)
1347                 close(fd);
1348         warning(
1349 "an error occurred during conversion, filesystem is partially created but not finalized and not mountable");
1350         return -1;
1351 }
1352
1353 /*
1354  * Check if a non 1:1 mapped chunk can be rolled back.
1355  * For new convert, it's OK while for old convert it's not.
1356  */
1357 static int may_rollback_chunk(struct btrfs_fs_info *fs_info, u64 bytenr)
1358 {
1359         struct btrfs_block_group_cache *bg;
1360         struct btrfs_key key;
1361         struct btrfs_path path;
1362         struct btrfs_root *extent_root = fs_info->extent_root;
1363         u64 bg_start;
1364         u64 bg_end;
1365         int ret;
1366
1367         bg = btrfs_lookup_first_block_group(fs_info, bytenr);
1368         if (!bg)
1369                 return -ENOENT;
1370         bg_start = bg->key.objectid;
1371         bg_end = bg->key.objectid + bg->key.offset;
1372
1373         key.objectid = bg_end;
1374         key.type = BTRFS_METADATA_ITEM_KEY;
1375         key.offset = 0;
1376         btrfs_init_path(&path);
1377
1378         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
1379         if (ret < 0)
1380                 return ret;
1381
1382         while (1) {
1383                 struct btrfs_extent_item *ei;
1384
1385                 ret = btrfs_previous_extent_item(extent_root, &path, bg_start);
1386                 if (ret > 0) {
1387                         ret = 0;
1388                         break;
1389                 }
1390                 if (ret < 0)
1391                         break;
1392
1393                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1394                 if (key.type == BTRFS_METADATA_ITEM_KEY)
1395                         continue;
1396                 /* Now it's EXTENT_ITEM_KEY only */
1397                 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
1398                                     struct btrfs_extent_item);
1399                 /*
1400                  * Found data extent, means this is old convert must follow 1:1
1401                  * mapping.
1402                  */
1403                 if (btrfs_extent_flags(path.nodes[0], ei)
1404                                 & BTRFS_EXTENT_FLAG_DATA) {
1405                         ret = -EINVAL;
1406                         break;
1407                 }
1408         }
1409         btrfs_release_path(&path);
1410         return ret;
1411 }
1412
1413 static int may_rollback(struct btrfs_root *root)
1414 {
1415         struct btrfs_fs_info *info = root->fs_info;
1416         struct btrfs_multi_bio *multi = NULL;
1417         u64 bytenr;
1418         u64 length;
1419         u64 physical;
1420         u64 total_bytes;
1421         int num_stripes;
1422         int ret;
1423
1424         if (btrfs_super_num_devices(info->super_copy) != 1)
1425                 goto fail;
1426
1427         bytenr = BTRFS_SUPER_INFO_OFFSET;
1428         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1429
1430         while (1) {
1431                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
1432                                       &length, &multi, 0, NULL);
1433                 if (ret) {
1434                         if (ret == -ENOENT) {
1435                                 /* removed block group at the tail */
1436                                 if (length == (u64)-1)
1437                                         break;
1438
1439                                 /* removed block group in the middle */
1440                                 goto next;
1441                         }
1442                         goto fail;
1443                 }
1444
1445                 num_stripes = multi->num_stripes;
1446                 physical = multi->stripes[0].physical;
1447                 free(multi);
1448
1449                 if (num_stripes != 1) {
1450                         error("num stripes for bytenr %llu is not 1", bytenr);
1451                         goto fail;
1452                 }
1453
1454                 /*
1455                  * Extra check for new convert, as metadata chunk from new
1456                  * convert is much more free than old convert, it doesn't need
1457                  * to do 1:1 mapping.
1458                  */
1459                 if (physical != bytenr) {
1460                         /*
1461                          * Check if it's a metadata chunk and has only metadata
1462                          * extent.
1463                          */
1464                         ret = may_rollback_chunk(info, bytenr);
1465                         if (ret < 0)
1466                                 goto fail;
1467                 }
1468 next:
1469                 bytenr += length;
1470                 if (bytenr >= total_bytes)
1471                         break;
1472         }
1473         return 0;
1474 fail:
1475         return -1;
1476 }
1477
1478 static int do_rollback(const char *devname)
1479 {
1480         int fd = -1;
1481         int ret;
1482         int i;
1483         struct btrfs_root *root;
1484         struct btrfs_root *image_root;
1485         struct btrfs_root *chunk_root;
1486         struct btrfs_dir_item *dir;
1487         struct btrfs_inode_item *inode;
1488         struct btrfs_file_extent_item *fi;
1489         struct btrfs_trans_handle *trans;
1490         struct extent_buffer *leaf;
1491         struct btrfs_block_group_cache *cache1;
1492         struct btrfs_block_group_cache *cache2;
1493         struct btrfs_key key;
1494         struct btrfs_path path;
1495         struct extent_io_tree io_tree;
1496         char *buf = NULL;
1497         char *name;
1498         u64 bytenr;
1499         u64 num_bytes;
1500         u64 root_dir;
1501         u64 objectid;
1502         u64 offset;
1503         u64 start;
1504         u64 end;
1505         u64 sb_bytenr;
1506         u64 first_free;
1507         u64 total_bytes;
1508         u32 sectorsize;
1509
1510         extent_io_tree_init(&io_tree);
1511
1512         fd = open(devname, O_RDWR);
1513         if (fd < 0) {
1514                 error("unable to open %s: %s", devname, strerror(errno));
1515                 goto fail;
1516         }
1517         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
1518         if (!root) {
1519                 error("unable to open ctree");
1520                 goto fail;
1521         }
1522         ret = may_rollback(root);
1523         if (ret < 0) {
1524                 error("unable to do rollback: %d", ret);
1525                 goto fail;
1526         }
1527
1528         sectorsize = root->sectorsize;
1529         buf = malloc(sectorsize);
1530         if (!buf) {
1531                 error("unable to allocate memory");
1532                 goto fail;
1533         }
1534
1535         btrfs_init_path(&path);
1536
1537         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1538         key.type = BTRFS_ROOT_BACKREF_KEY;
1539         key.offset = BTRFS_FS_TREE_OBJECTID;
1540         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, 0,
1541                                 0);
1542         btrfs_release_path(&path);
1543         if (ret > 0) {
1544                 error("unable to convert ext2 image subvolume, is it deleted?");
1545                 goto fail;
1546         } else if (ret < 0) {
1547                 error("unable to open ext2_saved, id %llu: %s",
1548                         (unsigned long long)key.objectid, strerror(-ret));
1549                 goto fail;
1550         }
1551
1552         key.objectid = CONV_IMAGE_SUBVOL_OBJECTID;
1553         key.type = BTRFS_ROOT_ITEM_KEY;
1554         key.offset = (u64)-1;
1555         image_root = btrfs_read_fs_root(root->fs_info, &key);
1556         if (!image_root || IS_ERR(image_root)) {
1557                 error("unable to open subvolume %llu: %ld",
1558                         (unsigned long long)key.objectid, PTR_ERR(image_root));
1559                 goto fail;
1560         }
1561
1562         name = "image";
1563         root_dir = btrfs_root_dirid(&root->root_item);
1564         dir = btrfs_lookup_dir_item(NULL, image_root, &path,
1565                                    root_dir, name, strlen(name), 0);
1566         if (!dir || IS_ERR(dir)) {
1567                 error("unable to find file %s: %ld", name, PTR_ERR(dir));
1568                 goto fail;
1569         }
1570         leaf = path.nodes[0];
1571         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
1572         btrfs_release_path(&path);
1573
1574         objectid = key.objectid;
1575
1576         ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0);
1577         if (ret) {
1578                 error("unable to find inode item: %d", ret);
1579                 goto fail;
1580         }
1581         leaf = path.nodes[0];
1582         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
1583         total_bytes = btrfs_inode_size(leaf, inode);
1584         btrfs_release_path(&path);
1585
1586         key.objectid = objectid;
1587         key.offset = 0;
1588         key.type = BTRFS_EXTENT_DATA_KEY;
1589         ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0);
1590         if (ret != 0) {
1591                 error("unable to find first file extent: %d", ret);
1592                 btrfs_release_path(&path);
1593                 goto fail;
1594         }
1595
1596         /* build mapping tree for the relocated blocks */
1597         for (offset = 0; offset < total_bytes; ) {
1598                 leaf = path.nodes[0];
1599                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1600                         ret = btrfs_next_leaf(root, &path);
1601                         if (ret != 0)
1602                                 break;  
1603                         continue;
1604                 }
1605
1606                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1607                 if (key.objectid != objectid || key.offset != offset ||
1608                     key.type != BTRFS_EXTENT_DATA_KEY)
1609                         break;
1610
1611                 fi = btrfs_item_ptr(leaf, path.slots[0],
1612                                     struct btrfs_file_extent_item);
1613                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
1614                         break;
1615                 if (btrfs_file_extent_compression(leaf, fi) ||
1616                     btrfs_file_extent_encryption(leaf, fi) ||
1617                     btrfs_file_extent_other_encoding(leaf, fi))
1618                         break;
1619
1620                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1621                 /* skip holes and direct mapped extents */
1622                 if (bytenr == 0 || bytenr == offset)
1623                         goto next_extent;
1624
1625                 bytenr += btrfs_file_extent_offset(leaf, fi);
1626                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
1627
1628                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1629                 cache2 = btrfs_lookup_block_group(root->fs_info,
1630                                                   offset + num_bytes - 1);
1631                 /*
1632                  * Here we must take consideration of old and new convert
1633                  * behavior.
1634                  * For old convert case, sign, there is no consist chunk type
1635                  * that will cover the extent. META/DATA/SYS are all possible.
1636                  * Just ensure relocate one is in SYS chunk.
1637                  * For new convert case, they are all covered by DATA chunk.
1638                  *
1639                  * So, there is not valid chunk type check for it now.
1640                  */
1641                 if (cache1 != cache2)
1642                         break;
1643
1644                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
1645                                 EXTENT_LOCKED, GFP_NOFS);
1646                 set_state_private(&io_tree, offset, bytenr);
1647 next_extent:
1648                 offset += btrfs_file_extent_num_bytes(leaf, fi);
1649                 path.slots[0]++;
1650         }
1651         btrfs_release_path(&path);
1652
1653         if (offset < total_bytes) {
1654                 error("unable to build extent mapping (offset %llu, total_bytes %llu)",
1655                                 (unsigned long long)offset,
1656                                 (unsigned long long)total_bytes);
1657                 error("converted filesystem after balance is unable to rollback");
1658                 goto fail;
1659         }
1660
1661         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
1662         first_free &= ~((u64)sectorsize - 1);
1663         /* backup for extent #0 should exist */
1664         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
1665                 error("no backup for the first extent");
1666                 goto fail;
1667         }
1668         /* force no allocation from system block group */
1669         root->fs_info->system_allocs = -1;
1670         trans = btrfs_start_transaction(root, 1);
1671         if (!trans) {
1672                 error("unable to start transaction");
1673                 goto fail;
1674         }
1675         /*
1676          * recow the whole chunk tree, this will remove all chunk tree blocks
1677          * from system block group
1678          */
1679         chunk_root = root->fs_info->chunk_root;
1680         memset(&key, 0, sizeof(key));
1681         while (1) {
1682                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
1683                 if (ret < 0)
1684                         break;
1685
1686                 ret = btrfs_next_leaf(chunk_root, &path);
1687                 if (ret)
1688                         break;
1689
1690                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1691                 btrfs_release_path(&path);
1692         }
1693         btrfs_release_path(&path);
1694
1695         offset = 0;
1696         num_bytes = 0;
1697         while(1) {
1698                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
1699                 if (!cache1)
1700                         break;
1701
1702                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
1703                         num_bytes += btrfs_block_group_used(&cache1->item);
1704
1705                 offset = cache1->key.objectid + cache1->key.offset;
1706         }
1707         /* only extent #0 left in system block group? */
1708         if (num_bytes > first_free) {
1709                 error(
1710         "unable to empty system block group (num_bytes %llu, first_free %llu",
1711                                 (unsigned long long)num_bytes,
1712                                 (unsigned long long)first_free);
1713                 goto fail;
1714         }
1715         /* create a system chunk that maps the whole device */
1716         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
1717         if (ret) {
1718                 error("unable to update system chunk: %d", ret);
1719                 goto fail;
1720         }
1721
1722         ret = btrfs_commit_transaction(trans, root);
1723         if (ret) {
1724                 error("transaction commit failed: %d", ret);
1725                 goto fail;
1726         }
1727
1728         ret = close_ctree(root);
1729         if (ret) {
1730                 error("close_ctree failed: %d", ret);
1731                 goto fail;
1732         }
1733
1734         /* zero btrfs super block mirrors */
1735         memset(buf, 0, sectorsize);
1736         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1737                 bytenr = btrfs_sb_offset(i);
1738                 if (bytenr >= total_bytes)
1739                         break;
1740                 ret = pwrite(fd, buf, sectorsize, bytenr);
1741                 if (ret != sectorsize) {
1742                         error("zeroing superblock mirror %d failed: %d",
1743                                         i, ret);
1744                         goto fail;
1745                 }
1746         }
1747
1748         sb_bytenr = (u64)-1;
1749         /* copy all relocated blocks back */
1750         while(1) {
1751                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
1752                                             EXTENT_LOCKED);
1753                 if (ret)
1754                         break;
1755
1756                 ret = get_state_private(&io_tree, start, &bytenr);
1757                 BUG_ON(ret);
1758
1759                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
1760                                   GFP_NOFS);
1761
1762                 while (start <= end) {
1763                         if (start == BTRFS_SUPER_INFO_OFFSET) {
1764                                 sb_bytenr = bytenr;
1765                                 goto next_sector;
1766                         }
1767                         ret = pread(fd, buf, sectorsize, bytenr);
1768                         if (ret < 0) {
1769                                 error("reading superblock at %llu failed: %d",
1770                                                 (unsigned long long)bytenr, ret);
1771                                 goto fail;
1772                         }
1773                         BUG_ON(ret != sectorsize);
1774                         ret = pwrite(fd, buf, sectorsize, start);
1775                         if (ret < 0) {
1776                                 error("writing superblock at %llu failed: %d",
1777                                                 (unsigned long long)start, ret);
1778                                 goto fail;
1779                         }
1780                         BUG_ON(ret != sectorsize);
1781 next_sector:
1782                         start += sectorsize;
1783                         bytenr += sectorsize;
1784                 }
1785         }
1786
1787         ret = fsync(fd);
1788         if (ret < 0) {
1789                 error("fsync failed: %s", strerror(errno));
1790                 goto fail;
1791         }
1792         /*
1793          * finally, overwrite btrfs super block.
1794          */
1795         ret = pread(fd, buf, sectorsize, sb_bytenr);
1796         if (ret < 0) {
1797                 error("reading primary superblock failed: %s",
1798                                 strerror(errno));
1799                 goto fail;
1800         }
1801         BUG_ON(ret != sectorsize);
1802         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1803         if (ret < 0) {
1804                 error("writing primary superblock failed: %s",
1805                                 strerror(errno));
1806                 goto fail;
1807         }
1808         BUG_ON(ret != sectorsize);
1809         ret = fsync(fd);
1810         if (ret < 0) {
1811                 error("fsync failed: %s", strerror(errno));
1812                 goto fail;
1813         }
1814
1815         close(fd);
1816         free(buf);
1817         extent_io_tree_cleanup(&io_tree);
1818         printf("rollback complete\n");
1819         return 0;
1820
1821 fail:
1822         if (fd != -1)
1823                 close(fd);
1824         free(buf);
1825         error("rollback aborted");
1826         return -1;
1827 }
1828
1829 static void print_usage(void)
1830 {
1831         printf("usage: btrfs-convert [options] device\n");
1832         printf("options:\n");
1833         printf("\t-d|--no-datasum        disable data checksum, sets NODATASUM\n");
1834         printf("\t-i|--no-xattr          ignore xattrs and ACLs\n");
1835         printf("\t-n|--no-inline         disable inlining of small files to metadata\n");
1836         printf("\t-N|--nodesize SIZE     set filesystem metadata nodesize\n");
1837         printf("\t-r|--rollback          roll back to the original filesystem\n");
1838         printf("\t-l|--label LABEL       set filesystem label\n");
1839         printf("\t-L|--copy-label        use label from converted filesystem\n");
1840         printf("\t-p|--progress          show converting progress (default)\n");
1841         printf("\t-O|--features LIST     comma separated list of filesystem features\n");
1842         printf("\t--no-progress          show only overview, not the detailed progress\n");
1843         printf("\n");
1844         printf("Supported filesystems:\n");
1845         printf("\text2/3/4: %s\n", BTRFSCONVERT_EXT2 ? "yes" : "no");
1846 }
1847
1848 int main(int argc, char *argv[])
1849 {
1850         int ret;
1851         int packing = 1;
1852         int noxattr = 0;
1853         int datacsum = 1;
1854         u32 nodesize = max_t(u32, sysconf(_SC_PAGESIZE),
1855                         BTRFS_MKFS_DEFAULT_NODE_SIZE);
1856         int rollback = 0;
1857         int copylabel = 0;
1858         int usage_error = 0;
1859         int progress = 1;
1860         char *file;
1861         char fslabel[BTRFS_LABEL_SIZE];
1862         u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
1863
1864         while(1) {
1865                 enum { GETOPT_VAL_NO_PROGRESS = 256 };
1866                 static const struct option long_options[] = {
1867                         { "no-progress", no_argument, NULL,
1868                                 GETOPT_VAL_NO_PROGRESS },
1869                         { "no-datasum", no_argument, NULL, 'd' },
1870                         { "no-inline", no_argument, NULL, 'n' },
1871                         { "no-xattr", no_argument, NULL, 'i' },
1872                         { "rollback", no_argument, NULL, 'r' },
1873                         { "features", required_argument, NULL, 'O' },
1874                         { "progress", no_argument, NULL, 'p' },
1875                         { "label", required_argument, NULL, 'l' },
1876                         { "copy-label", no_argument, NULL, 'L' },
1877                         { "nodesize", required_argument, NULL, 'N' },
1878                         { "help", no_argument, NULL, GETOPT_VAL_HELP},
1879                         { NULL, 0, NULL, 0 }
1880                 };
1881                 int c = getopt_long(argc, argv, "dinN:rl:LpO:", long_options, NULL);
1882
1883                 if (c < 0)
1884                         break;
1885                 switch(c) {
1886                         case 'd':
1887                                 datacsum = 0;
1888                                 break;
1889                         case 'i':
1890                                 noxattr = 1;
1891                                 break;
1892                         case 'n':
1893                                 packing = 0;
1894                                 break;
1895                         case 'N':
1896                                 nodesize = parse_size(optarg);
1897                                 break;
1898                         case 'r':
1899                                 rollback = 1;
1900                                 break;
1901                         case 'l':
1902                                 copylabel = -1;
1903                                 if (strlen(optarg) >= BTRFS_LABEL_SIZE) {
1904                                         warning(
1905                                         "label too long, trimmed to %d bytes",
1906                                                 BTRFS_LABEL_SIZE - 1);
1907                                 }
1908                                 __strncpy_null(fslabel, optarg, BTRFS_LABEL_SIZE - 1);
1909                                 break;
1910                         case 'L':
1911                                 copylabel = 1;
1912                                 break;
1913                         case 'p':
1914                                 progress = 1;
1915                                 break;
1916                         case 'O': {
1917                                 char *orig = strdup(optarg);
1918                                 char *tmp = orig;
1919
1920                                 tmp = btrfs_parse_fs_features(tmp, &features);
1921                                 if (tmp) {
1922                                         error("unrecognized filesystem feature: %s",
1923                                                         tmp);
1924                                         free(orig);
1925                                         exit(1);
1926                                 }
1927                                 free(orig);
1928                                 if (features & BTRFS_FEATURE_LIST_ALL) {
1929                                         btrfs_list_all_fs_features(
1930                                                 ~BTRFS_CONVERT_ALLOWED_FEATURES);
1931                                         exit(0);
1932                                 }
1933                                 if (features & ~BTRFS_CONVERT_ALLOWED_FEATURES) {
1934                                         char buf[64];
1935
1936                                         btrfs_parse_features_to_string(buf,
1937                                                 features & ~BTRFS_CONVERT_ALLOWED_FEATURES);
1938                                         error("features not allowed for convert: %s",
1939                                                 buf);
1940                                         exit(1);
1941                                 }
1942
1943                                 break;
1944                                 }
1945                         case GETOPT_VAL_NO_PROGRESS:
1946                                 progress = 0;
1947                                 break;
1948                         case GETOPT_VAL_HELP:
1949                         default:
1950                                 print_usage();
1951                                 return c != GETOPT_VAL_HELP;
1952                 }
1953         }
1954         set_argv0(argv);
1955         if (check_argc_exact(argc - optind, 1)) {
1956                 print_usage();
1957                 return 1;
1958         }
1959
1960         if (rollback && (!datacsum || noxattr || !packing)) {
1961                 fprintf(stderr,
1962                         "Usage error: -d, -i, -n options do not apply to rollback\n");
1963                 usage_error++;
1964         }
1965
1966         if (usage_error) {
1967                 print_usage();
1968                 return 1;
1969         }
1970
1971         file = argv[optind];
1972         ret = check_mounted(file);
1973         if (ret < 0) {
1974                 error("could not check mount status: %s", strerror(-ret));
1975                 return 1;
1976         } else if (ret) {
1977                 error("%s is mounted", file);
1978                 return 1;
1979         }
1980
1981         if (rollback) {
1982                 ret = do_rollback(file);
1983         } else {
1984                 ret = do_convert(file, datacsum, packing, noxattr, nodesize,
1985                                 copylabel, fslabel, progress, features);
1986         }
1987         if (ret)
1988                 return 1;
1989         return 0;
1990 }