btrfs-progs: Turning ON incompat isn't an error
[platform/upstream/btrfs-progs.git] / btrfs-convert.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 600
20 #define _GNU_SOURCE 1
21
22 #include "kerncompat.h"
23
24 #include <sys/ioctl.h>
25 #include <sys/mount.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/acl.h>
31 #include <fcntl.h>
32 #include <unistd.h>
33 #include <uuid/uuid.h>
34
35 #include "ctree.h"
36 #include "disk-io.h"
37 #include "volumes.h"
38 #include "transaction.h"
39 #include "crc32c.h"
40 #include "utils.h"
41 #include <ext2fs/ext2_fs.h>
42 #include <ext2fs/ext2fs.h>
43 #include <ext2fs/ext2_ext_attr.h>
44
45 #define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO)
46 #define STRIPE_LEN (64 * 1024)
47 #define EXT2_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID
48
49 /*
50  * Open Ext2fs in readonly mode, read block allocation bitmap and
51  * inode bitmap into memory.
52  */
53 static int open_ext2fs(const char *name, ext2_filsys *ret_fs)
54 {
55         errcode_t ret;
56         ext2_filsys ext2_fs;
57         ext2_ino_t ino;
58         ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs);
59         if (ret) {
60                 fprintf(stderr, "ext2fs_open: %s\n", error_message(ret));
61                 goto fail;
62         }
63         ret = ext2fs_read_inode_bitmap(ext2_fs);
64         if (ret) {
65                 fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n",
66                         error_message(ret));
67                 goto fail;
68         }
69         ret = ext2fs_read_block_bitmap(ext2_fs);
70         if (ret) {
71                 fprintf(stderr, "ext2fs_read_block_bitmap: %s\n",
72                         error_message(ret));
73                 goto fail;
74         }
75         /*
76          * search each block group for a free inode. this set up
77          * uninit block/inode bitmaps appropriately.
78          */
79         ino = 1;
80         while (ino <= ext2_fs->super->s_inodes_count) {
81                 ext2_ino_t foo;
82                 ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo);
83                 ino += EXT2_INODES_PER_GROUP(ext2_fs->super);
84         }
85
86         *ret_fs = ext2_fs;
87         return 0;
88 fail:
89         return -1;
90 }
91
92 static int close_ext2fs(ext2_filsys fs)
93 {
94         ext2fs_close(fs);
95         return 0;
96 }
97
98 static int ext2_alloc_block(ext2_filsys fs, u64 goal, u64 *block_ret)
99 {
100         blk_t block;
101
102         if (!ext2fs_new_block(fs, goal, NULL, &block)) {
103                 ext2fs_fast_mark_block_bitmap(fs->block_map, block);
104                 *block_ret = block;
105                 return 0;
106         }
107         return -ENOSPC;
108 }
109
110 static int ext2_free_block(ext2_filsys fs, u64 block)
111 {
112         BUG_ON(block != (blk_t)block);
113         ext2fs_fast_unmark_block_bitmap(fs->block_map, block);
114         return 0;
115 }
116
117 static int cache_free_extents(struct btrfs_root *root, ext2_filsys ext2_fs)
118
119 {
120         int i, ret = 0;
121         blk_t block;
122         u64 bytenr;
123         u64 blocksize = ext2_fs->blocksize;
124
125         block = ext2_fs->super->s_first_data_block;
126         for (; block < ext2_fs->super->s_blocks_count; block++) {
127                 if (ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block))
128                         continue;
129                 bytenr = block * blocksize;
130                 ret = set_extent_dirty(&root->fs_info->free_space_cache,
131                                        bytenr, bytenr + blocksize - 1, 0);
132                 BUG_ON(ret);
133         }
134
135         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
136                 bytenr = btrfs_sb_offset(i);
137                 bytenr &= ~((u64)STRIPE_LEN - 1);
138                 if (bytenr >= blocksize * ext2_fs->super->s_blocks_count)
139                         break;
140                 clear_extent_dirty(&root->fs_info->free_space_cache, bytenr,
141                                    bytenr + STRIPE_LEN - 1, 0);
142         }
143
144         clear_extent_dirty(&root->fs_info->free_space_cache,
145                            0, BTRFS_SUPER_INFO_OFFSET - 1, 0);
146
147         return 0;
148 }
149
150 static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes,
151                                u64 hint_byte, struct btrfs_key *ins)
152 {
153         u64 start;
154         u64 end;
155         u64 last = hint_byte;
156         int ret;
157         int wrapped = 0;
158         struct btrfs_block_group_cache *cache;
159
160         while(1) {
161                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
162                                             last, &start, &end, EXTENT_DIRTY);
163                 if (ret) {
164                         if (wrapped++ == 0) {
165                                 last = 0;
166                                 continue;
167                         } else {
168                                 goto fail;
169                         }
170                 }
171
172                 start = max(last, start);
173                 last = end + 1;
174                 if (last - start < num_bytes)
175                         continue;
176
177                 last = start + num_bytes;
178                 if (test_range_bit(&root->fs_info->pinned_extents,
179                                    start, last - 1, EXTENT_DIRTY, 0))
180                         continue;
181
182                 cache = btrfs_lookup_block_group(root->fs_info, start);
183                 BUG_ON(!cache);
184                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM ||
185                     last > cache->key.objectid + cache->key.offset) {
186                         last = cache->key.objectid + cache->key.offset;
187                         continue;
188                 }
189
190                 clear_extent_dirty(&root->fs_info->free_space_cache,
191                                    start, start + num_bytes - 1, 0);
192
193                 ins->objectid = start;
194                 ins->offset = num_bytes;
195                 ins->type = BTRFS_EXTENT_ITEM_KEY;
196                 return 0;
197         }
198 fail:
199         fprintf(stderr, "not enough free space\n");
200         return -ENOSPC;
201 }
202
203 static int intersect_with_sb(u64 bytenr, u64 num_bytes)
204 {
205         int i;
206         u64 offset;
207
208         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
209                 offset = btrfs_sb_offset(i);
210                 offset &= ~((u64)STRIPE_LEN - 1);
211
212                 if (bytenr < offset + STRIPE_LEN &&
213                     bytenr + num_bytes > offset)
214                         return 1;
215         }
216         return 0;
217 }
218
219 static int custom_free_extent(struct btrfs_root *root, u64 bytenr,
220                               u64 num_bytes)
221 {
222         return intersect_with_sb(bytenr, num_bytes);
223 }
224
225 static struct btrfs_extent_ops extent_ops = {
226         .alloc_extent = custom_alloc_extent,
227         .free_extent = custom_free_extent,
228 };
229
230 struct dir_iterate_data {
231         struct btrfs_trans_handle *trans;
232         struct btrfs_root *root;
233         struct btrfs_inode_item *inode;
234         u64 objectid;
235         u64 index_cnt;
236         u64 parent;
237         int errcode;
238 };
239
240 static u8 filetype_conversion_table[EXT2_FT_MAX] = {
241         [EXT2_FT_UNKNOWN]       = BTRFS_FT_UNKNOWN,
242         [EXT2_FT_REG_FILE]      = BTRFS_FT_REG_FILE,
243         [EXT2_FT_DIR]           = BTRFS_FT_DIR,
244         [EXT2_FT_CHRDEV]        = BTRFS_FT_CHRDEV,
245         [EXT2_FT_BLKDEV]        = BTRFS_FT_BLKDEV,
246         [EXT2_FT_FIFO]          = BTRFS_FT_FIFO,
247         [EXT2_FT_SOCK]          = BTRFS_FT_SOCK,
248         [EXT2_FT_SYMLINK]       = BTRFS_FT_SYMLINK,
249 };
250
251 static int dir_iterate_proc(ext2_ino_t dir, int entry,
252                             struct ext2_dir_entry *dirent,
253                             int offset, int blocksize,
254                             char *buf,void *priv_data)
255 {
256         int ret;
257         int file_type;
258         u64 objectid;
259         u64 inode_size;
260         char dotdot[] = "..";
261         struct btrfs_key location;
262         struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data;
263         int name_len;
264
265         name_len = dirent->name_len & 0xFF;
266
267         objectid = dirent->inode + INO_OFFSET;
268         if (!strncmp(dirent->name, dotdot, name_len)) {
269                 if (name_len == 2) {
270                         BUG_ON(idata->parent != 0);
271                         idata->parent = objectid;
272                 }
273                 return 0;
274         }
275         if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO)
276                 return 0;
277
278         location.objectid = objectid;
279         location.offset = 0;
280         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
281
282         file_type = dirent->name_len >> 8;
283         BUG_ON(file_type > EXT2_FT_SYMLINK);
284         ret = btrfs_insert_dir_item(idata->trans, idata->root,
285                                     dirent->name, name_len,
286                                     idata->objectid, &location,
287                                     filetype_conversion_table[file_type],
288                                     idata->index_cnt);
289         if (ret)
290                 goto fail;
291         ret = btrfs_insert_inode_ref(idata->trans, idata->root,
292                                      dirent->name, name_len,
293                                      objectid, idata->objectid,
294                                      idata->index_cnt);
295         if (ret)
296                 goto fail;
297         idata->index_cnt++;
298         inode_size = btrfs_stack_inode_size(idata->inode) +
299                      name_len * 2;
300         btrfs_set_stack_inode_size(idata->inode, inode_size);
301         return 0;
302 fail:
303         idata->errcode = ret;
304         return BLOCK_ABORT;
305 }
306
307 static int create_dir_entries(struct btrfs_trans_handle *trans,
308                               struct btrfs_root *root, u64 objectid,
309                               struct btrfs_inode_item *btrfs_inode,
310                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
311 {
312         int ret;
313         errcode_t err;
314         struct dir_iterate_data data = {
315                 .trans          = trans,
316                 .root           = root,
317                 .inode          = btrfs_inode,
318                 .objectid       = objectid,
319                 .index_cnt      = 2,
320                 .parent         = 0,
321                 .errcode        = 0,
322         };
323
324         err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL,
325                                   dir_iterate_proc, &data);
326         if (err)
327                 goto error;
328         ret = data.errcode;
329         if (ret == 0 && data.parent == objectid) {
330                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
331                                              objectid, objectid, 0);
332         }
333         return ret;
334 error:
335         fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err));
336         return -1;
337 }
338
339 static int read_disk_extent(struct btrfs_root *root, u64 bytenr,
340                             u32 num_bytes, char *buffer)
341 {
342         int ret;
343         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
344
345         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr);
346         if (ret != num_bytes)
347                 goto fail;
348         ret = 0;
349 fail:
350         if (ret > 0)
351                 ret = -1;
352         return ret;
353 }
354
355 static int csum_disk_extent(struct btrfs_trans_handle *trans,
356                             struct btrfs_root *root,
357                             u64 disk_bytenr, u64 num_bytes)
358 {
359         u32 blocksize = root->sectorsize;
360         u64 offset;
361         char *buffer;
362         int ret = 0;
363
364         buffer = malloc(blocksize);
365         if (!buffer)
366                 return -ENOMEM;
367         for (offset = 0; offset < num_bytes; offset += blocksize) {
368                 ret = read_disk_extent(root, disk_bytenr + offset,
369                                         blocksize, buffer);
370                 if (ret)
371                         break;
372                 ret = btrfs_csum_file_block(trans,
373                                             root->fs_info->csum_root,
374                                             disk_bytenr + num_bytes,
375                                             disk_bytenr + offset,
376                                             buffer, blocksize);
377                 if (ret)
378                         break;
379         }
380         free(buffer);
381         return ret;
382 }
383
384 static int record_file_blocks(struct btrfs_trans_handle *trans,
385                               struct btrfs_root *root, u64 objectid,
386                               struct btrfs_inode_item *inode,
387                               u64 file_block, u64 disk_block,
388                               u64 num_blocks, int checksum)
389 {
390         int ret;
391         u64 file_pos = file_block * root->sectorsize;
392         u64 disk_bytenr = disk_block * root->sectorsize;
393         u64 num_bytes = num_blocks * root->sectorsize;
394         ret = btrfs_record_file_extent(trans, root, objectid, inode, file_pos,
395                                         disk_bytenr, num_bytes);
396
397         if (ret || !checksum)
398                 return ret;
399
400         return csum_disk_extent(trans, root, disk_bytenr, num_bytes);
401 }
402
403 struct blk_iterate_data {
404         struct btrfs_trans_handle *trans;
405         struct btrfs_root *root;
406         struct btrfs_inode_item *inode;
407         u64 objectid;
408         u64 first_block;
409         u64 disk_block;
410         u64 num_blocks;
411         u64 boundary;
412         int checksum;
413         int errcode;
414 };
415
416 static int block_iterate_proc(ext2_filsys ext2_fs,
417                               u64 disk_block, u64 file_block,
418                               struct blk_iterate_data *idata)
419 {
420         int ret;
421         int sb_region;
422         int do_barrier;
423         struct btrfs_root *root = idata->root;
424         struct btrfs_trans_handle *trans = idata->trans;
425         struct btrfs_block_group_cache *cache;
426         u64 bytenr = disk_block * root->sectorsize;
427
428         sb_region = intersect_with_sb(bytenr, root->sectorsize);
429         do_barrier = sb_region || disk_block >= idata->boundary;
430         if ((idata->num_blocks > 0 && do_barrier) ||
431             (file_block > idata->first_block + idata->num_blocks) ||
432             (disk_block != idata->disk_block + idata->num_blocks)) {
433                 if (idata->num_blocks > 0) {
434                         ret = record_file_blocks(trans, root, idata->objectid,
435                                         idata->inode, idata->first_block,
436                                         idata->disk_block, idata->num_blocks,
437                                         idata->checksum);
438                         if (ret)
439                                 goto fail;
440                         idata->first_block += idata->num_blocks;
441                         idata->num_blocks = 0;
442                 }
443                 if (file_block > idata->first_block) {
444                         ret = record_file_blocks(trans, root, idata->objectid,
445                                         idata->inode, idata->first_block,
446                                         0, file_block - idata->first_block,
447                                         idata->checksum);
448                         if (ret)
449                                 goto fail;
450                 }
451
452                 if (sb_region) {
453                         bytenr += STRIPE_LEN - 1;
454                         bytenr &= ~((u64)STRIPE_LEN - 1);
455                 } else {
456                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
457                         BUG_ON(!cache);
458                         bytenr = cache->key.objectid + cache->key.offset;
459                 }
460
461                 idata->first_block = file_block;
462                 idata->disk_block = disk_block;
463                 idata->boundary = bytenr / root->sectorsize;
464         }
465         idata->num_blocks++;
466         return 0;
467 fail:
468         idata->errcode = ret;
469         return BLOCK_ABORT;
470 }
471
472 static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr,
473                                 e2_blkcnt_t blockcnt, blk_t ref_block,
474                                 int ref_offset, void *priv_data)
475 {
476         struct blk_iterate_data *idata;
477         idata = (struct blk_iterate_data *)priv_data;
478         return block_iterate_proc(fs, *blocknr, blockcnt, idata);
479 }
480
481 /*
482  * traverse file's data blocks, record these data blocks as file extents.
483  */
484 static int create_file_extents(struct btrfs_trans_handle *trans,
485                                struct btrfs_root *root, u64 objectid,
486                                struct btrfs_inode_item *btrfs_inode,
487                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
488                                int datacsum, int packing)
489 {
490         int ret;
491         char *buffer = NULL;
492         errcode_t err;
493         u32 last_block;
494         u32 sectorsize = root->sectorsize;
495         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
496         struct blk_iterate_data data = {
497                 .trans          = trans,
498                 .root           = root,
499                 .inode          = btrfs_inode,
500                 .objectid       = objectid,
501                 .first_block    = 0,
502                 .disk_block     = 0,
503                 .num_blocks     = 0,
504                 .boundary       = (u64)-1,
505                 .checksum       = datacsum,
506                 .errcode        = 0,
507         };
508         err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY,
509                                     NULL, __block_iterate_proc, &data);
510         if (err)
511                 goto error;
512         ret = data.errcode;
513         if (ret)
514                 goto fail;
515         if (packing && data.first_block == 0 && data.num_blocks > 0 &&
516             inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
517                 u64 num_bytes = data.num_blocks * sectorsize;
518                 u64 disk_bytenr = data.disk_block * sectorsize;
519                 u64 nbytes;
520
521                 buffer = malloc(num_bytes);
522                 if (!buffer)
523                         return -ENOMEM;
524                 ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer);
525                 if (ret)
526                         goto fail;
527                 if (num_bytes > inode_size)
528                         num_bytes = inode_size;
529                 ret = btrfs_insert_inline_extent(trans, root, objectid,
530                                                  0, buffer, num_bytes);
531                 if (ret)
532                         goto fail;
533                 nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes;
534                 btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes);
535         } else if (data.num_blocks > 0) {
536                 ret = record_file_blocks(trans, root, objectid, btrfs_inode,
537                                          data.first_block, data.disk_block,
538                                          data.num_blocks, data.checksum);
539                 if (ret)
540                         goto fail;
541         }
542         data.first_block += data.num_blocks;
543         last_block = (inode_size + sectorsize - 1) / sectorsize;
544         if (last_block > data.first_block) {
545                 ret = record_file_blocks(trans, root, objectid, btrfs_inode,
546                                          data.first_block, 0, last_block -
547                                          data.first_block, data.checksum);
548         }
549 fail:
550         if (buffer)
551                 free(buffer);
552         return ret;
553 error:
554         fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err));
555         return -1;
556 }
557
558 static int create_symbol_link(struct btrfs_trans_handle *trans,
559                               struct btrfs_root *root, u64 objectid,
560                               struct btrfs_inode_item *btrfs_inode,
561                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
562                               struct ext2_inode *ext2_inode)
563 {
564         int ret;
565         char *pathname;
566         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
567         if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) {
568                 btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1);
569                 ret = create_file_extents(trans, root, objectid, btrfs_inode,
570                                           ext2_fs, ext2_ino, 1, 1);
571                 btrfs_set_stack_inode_size(btrfs_inode, inode_size);
572                 return ret;
573         }
574
575         pathname = (char *)&(ext2_inode->i_block[0]);
576         BUG_ON(pathname[inode_size] != 0);
577         ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
578                                          pathname, inode_size + 1);
579         btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1);
580         return ret;
581 }
582
583 /*
584  * Following xattr/acl related codes are based on codes in
585  * fs/ext3/xattr.c and fs/ext3/acl.c
586  */
587 #define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr))
588 #define EXT2_XATTR_BFIRST(ptr) \
589         ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1))
590 #define EXT2_XATTR_IHDR(inode) \
591         ((struct ext2_ext_attr_header *) ((void *)(inode) + \
592                 EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize))
593 #define EXT2_XATTR_IFIRST(inode) \
594         ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \
595                 sizeof(EXT2_XATTR_IHDR(inode)->h_magic)))
596
597 static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry,
598                                   const void *end)
599 {
600         struct ext2_ext_attr_entry *next;
601
602         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
603                 next = EXT2_EXT_ATTR_NEXT(entry);
604                 if ((void *)next >= end)
605                         return -EIO;
606                 entry = next;
607         }
608         return 0;
609 }
610
611 static int ext2_xattr_check_block(const char *buf, size_t size)
612 {
613         int error;
614         struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf);
615
616         if (header->h_magic != EXT2_EXT_ATTR_MAGIC ||
617             header->h_blocks != 1)
618                 return -EIO;
619         error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size);
620         return error;
621 }
622
623 static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry,
624                                   size_t size)
625 {
626         size_t value_size = entry->e_value_size;
627
628         if (entry->e_value_block != 0 || value_size > size ||
629             entry->e_value_offs + value_size > size)
630                 return -EIO;
631         return 0;
632 }
633
634 #define EXT2_ACL_VERSION        0x0001
635
636 typedef struct {
637         __le16          e_tag;
638         __le16          e_perm;
639         __le32          e_id;
640 } ext2_acl_entry;
641
642 typedef struct {
643         __le16          e_tag;
644         __le16          e_perm;
645 } ext2_acl_entry_short;
646
647 typedef struct {
648         __le32          a_version;
649 } ext2_acl_header;
650
651 static inline int ext2_acl_count(size_t size)
652 {
653         ssize_t s;
654         size -= sizeof(ext2_acl_header);
655         s = size - 4 * sizeof(ext2_acl_entry_short);
656         if (s < 0) {
657                 if (size % sizeof(ext2_acl_entry_short))
658                         return -1;
659                 return size / sizeof(ext2_acl_entry_short);
660         } else {
661                 if (s % sizeof(ext2_acl_entry))
662                         return -1;
663                 return s / sizeof(ext2_acl_entry) + 4;
664         }
665 }
666
667 #define ACL_EA_VERSION          0x0002
668
669 typedef struct {
670         __le16          e_tag;
671         __le16          e_perm;
672         __le32          e_id;
673 } acl_ea_entry;
674
675 typedef struct {
676         __le32          a_version;
677         acl_ea_entry    a_entries[0];
678 } acl_ea_header;
679
680 static inline size_t acl_ea_size(int count)
681 {
682         return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry);
683 }
684
685 static int ext2_acl_to_xattr(void *dst, const void *src,
686                              size_t dst_size, size_t src_size)
687 {
688         int i, count;
689         const void *end = src + src_size;
690         acl_ea_header *ext_acl = (acl_ea_header *)dst;
691         acl_ea_entry *dst_entry = ext_acl->a_entries;
692         ext2_acl_entry *src_entry;
693
694         if (src_size < sizeof(ext2_acl_header))
695                 goto fail;
696         if (((ext2_acl_header *)src)->a_version !=
697             cpu_to_le32(EXT2_ACL_VERSION))
698                 goto fail;
699         src += sizeof(ext2_acl_header);
700         count = ext2_acl_count(src_size);
701         if (count <= 0)
702                 goto fail;
703
704         BUG_ON(dst_size < acl_ea_size(count));
705         ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION);
706         for (i = 0; i < count; i++, dst_entry++) {
707                 src_entry = (ext2_acl_entry *)src;
708                 if (src + sizeof(ext2_acl_entry_short) > end)
709                         goto fail;
710                 dst_entry->e_tag = src_entry->e_tag;
711                 dst_entry->e_perm = src_entry->e_perm;
712                 switch (le16_to_cpu(src_entry->e_tag)) {
713                 case ACL_USER_OBJ:
714                 case ACL_GROUP_OBJ:
715                 case ACL_MASK:
716                 case ACL_OTHER:
717                         src += sizeof(ext2_acl_entry_short);
718                         dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
719                         break;
720                 case ACL_USER:
721                 case ACL_GROUP:
722                         src += sizeof(ext2_acl_entry);
723                         if (src > end)
724                                 goto fail;
725                         dst_entry->e_id = src_entry->e_id;
726                         break;
727                 default:
728                         goto fail;
729                 }
730         }
731         if (src != end)
732                 goto fail;
733         return 0;
734 fail:
735         return -EINVAL;
736 }
737
738 static char *xattr_prefix_table[] = {
739         [1] =   "user.",
740         [2] =   "system.posix_acl_access",
741         [3] =   "system.posix_acl_default",
742         [4] =   "trusted.",
743         [6] =   "security.",
744 };
745
746 static int copy_single_xattr(struct btrfs_trans_handle *trans,
747                              struct btrfs_root *root, u64 objectid,
748                              struct ext2_ext_attr_entry *entry,
749                              const void *data, u32 datalen)
750 {
751         int ret = 0;
752         int name_len;
753         int name_index;
754         void *databuf = NULL;
755         char namebuf[XATTR_NAME_MAX + 1];
756
757         name_index = entry->e_name_index;
758         if (name_index >= ARRAY_SIZE(xattr_prefix_table) ||
759             xattr_prefix_table[name_index] == NULL)
760                 return -EOPNOTSUPP;
761         name_len = strlen(xattr_prefix_table[name_index]) +
762                    entry->e_name_len;
763         if (name_len >= sizeof(namebuf))
764                 return -ERANGE;
765
766         if (name_index == 2 || name_index == 3) {
767                 size_t bufsize = acl_ea_size(ext2_acl_count(datalen));
768                 databuf = malloc(bufsize);
769                 if (!databuf)
770                        return -ENOMEM;
771                 ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen);
772                 if (ret)
773                         goto out;
774                 data = databuf;
775                 datalen = bufsize;
776         }
777         strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX);
778         strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len);
779         if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) -
780             sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) {
781                 fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n",
782                         objectid - INO_OFFSET, name_len, namebuf);
783                 goto out;
784         }
785         ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len,
786                                       data, datalen, objectid);
787 out:
788         if (databuf)
789                 free(databuf);
790         return ret;
791 }
792
793 static int copy_extended_attrs(struct btrfs_trans_handle *trans,
794                                struct btrfs_root *root, u64 objectid,
795                                struct btrfs_inode_item *btrfs_inode,
796                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
797 {
798         int ret = 0;
799         int inline_ea = 0;
800         errcode_t err;
801         u32 datalen;
802         u32 block_size = ext2_fs->blocksize;
803         u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super);
804         struct ext2_inode_large *ext2_inode;
805         struct ext2_ext_attr_entry *entry;
806         void *data;
807         char *buffer = NULL;
808         char inode_buf[EXT2_GOOD_OLD_INODE_SIZE];
809
810         if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) {
811                 ext2_inode = (struct ext2_inode_large *)inode_buf;
812         } else {
813                 ext2_inode = (struct ext2_inode_large *)malloc(inode_size);
814                 if (!ext2_inode)
815                        return -ENOMEM;
816         }
817         err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode,
818                                      inode_size);
819         if (err) {
820                 fprintf(stderr, "ext2fs_read_inode_full: %s\n",
821                         error_message(err));
822                 ret = -1;
823                 goto out;
824         }
825
826         if (ext2_ino > ext2_fs->super->s_first_ino &&
827             inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
828                 if (EXT2_GOOD_OLD_INODE_SIZE +
829                     ext2_inode->i_extra_isize > inode_size) {
830                         ret = -EIO;
831                         goto out;
832                 }
833                 if (ext2_inode->i_extra_isize != 0 &&
834                     EXT2_XATTR_IHDR(ext2_inode)->h_magic ==
835                     EXT2_EXT_ATTR_MAGIC) {
836                         inline_ea = 1;
837                 }
838         }
839         if (inline_ea) {
840                 int total;
841                 void *end = (void *)ext2_inode + inode_size;
842                 entry = EXT2_XATTR_IFIRST(ext2_inode);
843                 total = end - (void *)entry;
844                 ret = ext2_xattr_check_names(entry, end);
845                 if (ret)
846                         goto out;
847                 while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
848                         ret = ext2_xattr_check_entry(entry, total);
849                         if (ret)
850                                 goto out;
851                         data = (void *)EXT2_XATTR_IFIRST(ext2_inode) +
852                                 entry->e_value_offs;
853                         datalen = entry->e_value_size;
854                         ret = copy_single_xattr(trans, root, objectid,
855                                                 entry, data, datalen);
856                         if (ret)
857                                 goto out;
858                         entry = EXT2_EXT_ATTR_NEXT(entry);
859                 }
860         }
861
862         if (ext2_inode->i_file_acl == 0)
863                 goto out;
864
865         buffer = malloc(block_size);
866         if (!buffer) {
867                 ret = -ENOMEM;
868                 goto out;
869         }
870         err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer);
871         if (err) {
872                 fprintf(stderr, "ext2fs_read_ext_attr: %s\n",
873                         error_message(err));
874                 ret = -1;
875                 goto out;
876         }
877         ret = ext2_xattr_check_block(buffer, block_size);
878         if (ret)
879                 goto out;
880
881         entry = EXT2_XATTR_BFIRST(buffer);
882         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
883                 ret = ext2_xattr_check_entry(entry, block_size);
884                 if (ret)
885                         goto out;
886                 data = buffer + entry->e_value_offs;
887                 datalen = entry->e_value_size;
888                 ret = copy_single_xattr(trans, root, objectid,
889                                         entry, data, datalen);
890                 if (ret)
891                         goto out;
892                 entry = EXT2_EXT_ATTR_NEXT(entry);
893         }
894 out:
895         if (buffer != NULL)
896                 free(buffer);
897         if ((void *)ext2_inode != inode_buf)
898                 free(ext2_inode);
899         return ret;
900 }
901 #define MINORBITS       20
902 #define MKDEV(ma, mi)   (((ma) << MINORBITS) | (mi))
903
904 static inline dev_t old_decode_dev(u16 val)
905 {
906         return MKDEV((val >> 8) & 255, val & 255);
907 }
908
909 static inline dev_t new_decode_dev(u32 dev)
910 {
911         unsigned major = (dev & 0xfff00) >> 8;
912         unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
913         return MKDEV(major, minor);
914 }
915
916 static int copy_inode_item(struct btrfs_inode_item *dst,
917                            struct ext2_inode *src, u32 blocksize)
918 {
919         btrfs_set_stack_inode_generation(dst, 1);
920         btrfs_set_stack_inode_size(dst, src->i_size);
921         btrfs_set_stack_inode_nbytes(dst, 0);
922         btrfs_set_stack_inode_block_group(dst, 0);
923         btrfs_set_stack_inode_nlink(dst, src->i_links_count);
924         btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16));
925         btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16));
926         btrfs_set_stack_inode_mode(dst, src->i_mode);
927         btrfs_set_stack_inode_rdev(dst, 0);
928         btrfs_set_stack_inode_flags(dst, 0);
929         btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime);
930         btrfs_set_stack_timespec_nsec(&dst->atime, 0);
931         btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime);
932         btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
933         btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime);
934         btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
935         btrfs_set_stack_timespec_sec(&dst->otime, 0);
936         btrfs_set_stack_timespec_nsec(&dst->otime, 0);
937
938         if (S_ISDIR(src->i_mode)) {
939                 btrfs_set_stack_inode_size(dst, 0);
940                 btrfs_set_stack_inode_nlink(dst, 1);
941         }
942         if (S_ISREG(src->i_mode)) {
943                 btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 |
944                                            (u64)src->i_size);
945         }
946         if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) &&
947             !S_ISLNK(src->i_mode)) {
948                 if (src->i_block[0]) {
949                         btrfs_set_stack_inode_rdev(dst,
950                                 old_decode_dev(src->i_block[0]));
951                 } else {
952                         btrfs_set_stack_inode_rdev(dst,
953                                 new_decode_dev(src->i_block[1]));
954                 }
955         }
956         return 0;
957 }
958
959 /*
960  * copy a single inode. do all the required works, such as cloning
961  * inode item, creating file extents and creating directory entries.
962  */
963 static int copy_single_inode(struct btrfs_trans_handle *trans,
964                              struct btrfs_root *root, u64 objectid,
965                              ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
966                              struct ext2_inode *ext2_inode,
967                              int datacsum, int packing, int noxattr)
968 {
969         int ret;
970         struct btrfs_key inode_key;
971         struct btrfs_inode_item btrfs_inode;
972
973         if (ext2_inode->i_links_count == 0)
974                 return 0;
975
976         copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize);
977         if (!datacsum && S_ISREG(ext2_inode->i_mode)) {
978                 u32 flags = btrfs_stack_inode_flags(&btrfs_inode) |
979                             BTRFS_INODE_NODATASUM;
980                 btrfs_set_stack_inode_flags(&btrfs_inode, flags);
981         }
982
983         switch (ext2_inode->i_mode & S_IFMT) {
984         case S_IFREG:
985                 ret = create_file_extents(trans, root, objectid, &btrfs_inode,
986                                         ext2_fs, ext2_ino, datacsum, packing);
987                 break;
988         case S_IFDIR:
989                 ret = create_dir_entries(trans, root, objectid, &btrfs_inode,
990                                          ext2_fs, ext2_ino);
991                 break;
992         case S_IFLNK:
993                 ret = create_symbol_link(trans, root, objectid, &btrfs_inode,
994                                          ext2_fs, ext2_ino, ext2_inode);
995                 break;
996         default:
997                 ret = 0;
998                 break;
999         }
1000         if (ret)
1001                 return ret;
1002
1003         if (!noxattr) {
1004                 ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode,
1005                                           ext2_fs, ext2_ino);
1006                 if (ret)
1007                         return ret;
1008         }
1009         inode_key.objectid = objectid;
1010         inode_key.offset = 0;
1011         btrfs_set_key_type(&inode_key, BTRFS_INODE_ITEM_KEY);
1012         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1013         return ret;
1014 }
1015
1016 static int copy_disk_extent(struct btrfs_root *root, u64 dst_bytenr,
1017                             u64 src_bytenr, u32 num_bytes)
1018 {
1019         int ret;
1020         char *buffer;
1021         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
1022
1023         buffer = malloc(num_bytes);
1024         if (!buffer)
1025                 return -ENOMEM;
1026         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, src_bytenr);
1027         if (ret != num_bytes)
1028                 goto fail;
1029         ret = pwrite(fs_devs->latest_bdev, buffer, num_bytes, dst_bytenr);
1030         if (ret != num_bytes)
1031                 goto fail;
1032         ret = 0;
1033 fail:
1034         free(buffer);
1035         if (ret > 0)
1036                 ret = -1;
1037         return ret;
1038 }
1039 /*
1040  * scan ext2's inode bitmap and copy all used inodes.
1041  */
1042 static int copy_inodes(struct btrfs_root *root, ext2_filsys ext2_fs,
1043                        int datacsum, int packing, int noxattr)
1044 {
1045         int ret;
1046         errcode_t err;
1047         ext2_inode_scan ext2_scan;
1048         struct ext2_inode ext2_inode;
1049         ext2_ino_t ext2_ino;
1050         u64 objectid;
1051         struct btrfs_trans_handle *trans;
1052
1053         trans = btrfs_start_transaction(root, 1);
1054         if (!trans)
1055                 return -ENOMEM;
1056         err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan);
1057         if (err) {
1058                 fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err));
1059                 return -1;
1060         }
1061         while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino,
1062                                              &ext2_inode))) {
1063                 /* no more inodes */
1064                 if (ext2_ino == 0)
1065                         break;
1066                 /* skip special inode in ext2fs */
1067                 if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO &&
1068                     ext2_ino != EXT2_ROOT_INO)
1069                         continue;
1070                 objectid = ext2_ino + INO_OFFSET;
1071                 ret = copy_single_inode(trans, root,
1072                                         objectid, ext2_fs, ext2_ino,
1073                                         &ext2_inode, datacsum, packing,
1074                                         noxattr);
1075                 if (ret)
1076                         return ret;
1077                 if (trans->blocks_used >= 4096) {
1078                         ret = btrfs_commit_transaction(trans, root);
1079                         BUG_ON(ret);
1080                         trans = btrfs_start_transaction(root, 1);
1081                         BUG_ON(!trans);
1082                 }
1083         }
1084         if (err) {
1085                 fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err));
1086                 return -1;
1087         }
1088         ret = btrfs_commit_transaction(trans, root);
1089         BUG_ON(ret);
1090
1091         return ret;
1092 }
1093
1094 /*
1095  * Construct a range of ext2fs image file.
1096  * scan block allocation bitmap, find all blocks used by the ext2fs
1097  * in this range and create file extents that point to these blocks.
1098  *
1099  * Note: Before calling the function, no file extent points to blocks
1100  *       in this range
1101  */
1102 static int create_image_file_range(struct btrfs_trans_handle *trans,
1103                                    struct btrfs_root *root, u64 objectid,
1104                                    struct btrfs_inode_item *inode,
1105                                    u64 start_byte, u64 end_byte,
1106                                    ext2_filsys ext2_fs)
1107 {
1108         u32 blocksize = ext2_fs->blocksize;
1109         u32 block = start_byte / blocksize;
1110         u32 last_block = (end_byte + blocksize - 1) / blocksize;
1111         int ret = 0;
1112         struct blk_iterate_data data = {
1113                 .trans          = trans,
1114                 .root           = root,
1115                 .inode          = inode,
1116                 .objectid       = objectid,
1117                 .first_block    = block,
1118                 .disk_block     = 0,
1119                 .num_blocks     = 0,
1120                 .boundary       = (u64)-1,
1121                 .checksum       = 0,
1122                 .errcode        = 0,
1123         };
1124         for (; start_byte < end_byte; block++, start_byte += blocksize) {
1125                 if (!ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block))
1126                         continue;
1127                 ret = block_iterate_proc(NULL, block, block, &data);
1128                 if (ret & BLOCK_ABORT) {
1129                         ret = data.errcode;
1130                         goto fail;
1131                 }
1132         }
1133         if (data.num_blocks > 0) {
1134                 ret = record_file_blocks(trans, root, objectid, inode,
1135                                          data.first_block, data.disk_block,
1136                                          data.num_blocks, 0);
1137                 if (ret)
1138                         goto fail;
1139                 data.first_block += data.num_blocks;
1140         }
1141         if (last_block > data.first_block) {
1142                 ret = record_file_blocks(trans, root, objectid, inode,
1143                                          data.first_block, 0, last_block -
1144                                          data.first_block, 0);
1145                 if (ret)
1146                         goto fail;
1147         }
1148 fail:
1149         return ret;
1150 }
1151 /*
1152  * Create the ext2fs image file.
1153  */
1154 static int create_ext2_image(struct btrfs_root *root, ext2_filsys ext2_fs,
1155                              const char *name)
1156 {
1157         int ret;
1158         struct btrfs_key key;
1159         struct btrfs_key location;
1160         struct btrfs_path path;
1161         struct btrfs_inode_item btrfs_inode;
1162         struct btrfs_inode_item *inode_item;
1163         struct extent_buffer *leaf;
1164         struct btrfs_fs_info *fs_info = root->fs_info;
1165         struct btrfs_root *extent_root = fs_info->extent_root;
1166         struct btrfs_trans_handle *trans;
1167         struct btrfs_extent_item *ei;
1168         struct btrfs_extent_inline_ref *iref;
1169         struct btrfs_extent_data_ref *dref;
1170         u64 bytenr;
1171         u64 num_bytes;
1172         u64 objectid;
1173         u64 last_byte;
1174         u64 first_free;
1175         u64 total_bytes;
1176         u32 sectorsize = root->sectorsize;
1177
1178         total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
1179         first_free =  BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1;
1180         first_free &= ~((u64)sectorsize - 1);
1181
1182         memset(&btrfs_inode, 0, sizeof(btrfs_inode));
1183         btrfs_set_stack_inode_generation(&btrfs_inode, 1);
1184         btrfs_set_stack_inode_size(&btrfs_inode, total_bytes);
1185         btrfs_set_stack_inode_nlink(&btrfs_inode, 1);
1186         btrfs_set_stack_inode_nbytes(&btrfs_inode, 0);
1187         btrfs_set_stack_inode_mode(&btrfs_inode, S_IFREG | 0400);
1188         btrfs_set_stack_inode_flags(&btrfs_inode, BTRFS_INODE_NODATASUM |
1189                                     BTRFS_INODE_READONLY);
1190         btrfs_init_path(&path);
1191         trans = btrfs_start_transaction(root, 1);
1192         BUG_ON(!trans);
1193
1194         objectid = btrfs_root_dirid(&root->root_item);
1195         ret = btrfs_find_free_objectid(trans, root, objectid, &objectid);
1196         if (ret)
1197                 goto fail;
1198
1199         /*
1200          * copy blocks covered by extent #0 to new positions. extent #0 is
1201          * special, we can't rely on relocate_extents_range to relocate it.
1202          */
1203         for (last_byte = 0; last_byte < first_free; last_byte += sectorsize) {
1204                 ret = custom_alloc_extent(root, sectorsize, 0, &key);
1205                 if (ret)
1206                         goto fail;
1207                 ret = copy_disk_extent(root, key.objectid, last_byte,
1208                                        sectorsize);
1209                 if (ret)
1210                         goto fail;
1211                 ret = btrfs_record_file_extent(trans, root, objectid,
1212                                                &btrfs_inode, last_byte,
1213                                                key.objectid, sectorsize);
1214                 if (ret)
1215                         goto fail;
1216         }
1217
1218         while(1) {
1219                 key.objectid = last_byte;
1220                 key.offset = 0;
1221                 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1222                 ret = btrfs_search_slot(trans, fs_info->extent_root,
1223                                         &key, &path, 0, 0);
1224                 if (ret < 0)
1225                         goto fail;
1226 next:
1227                 leaf = path.nodes[0];
1228                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1229                         ret = btrfs_next_leaf(extent_root, &path);
1230                         if (ret < 0)
1231                                 goto fail;
1232                         if (ret > 0)
1233                                 break;
1234                         leaf = path.nodes[0];
1235                 }
1236                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1237                 if (last_byte > key.objectid ||
1238                     key.type != BTRFS_EXTENT_ITEM_KEY) {
1239                         path.slots[0]++;
1240                         goto next;
1241                 }
1242
1243                 bytenr = key.objectid;
1244                 num_bytes = key.offset;
1245                 ei = btrfs_item_ptr(leaf, path.slots[0],
1246                                     struct btrfs_extent_item);
1247                 if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) {
1248                         path.slots[0]++;
1249                         goto next;
1250                 }
1251
1252                 BUG_ON(btrfs_item_size_nr(leaf, path.slots[0]) != sizeof(*ei) +
1253                        btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY));
1254
1255                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
1256                 key.type = btrfs_extent_inline_ref_type(leaf, iref);
1257                 BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
1258                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1259                 if (btrfs_extent_data_ref_root(leaf, dref) !=
1260                     BTRFS_FS_TREE_OBJECTID) {
1261                         path.slots[0]++;
1262                         goto next;
1263                 }
1264
1265                 if (bytenr > last_byte) {
1266                         ret = create_image_file_range(trans, root, objectid,
1267                                                       &btrfs_inode, last_byte,
1268                                                       bytenr, ext2_fs);
1269                         if (ret)
1270                                 goto fail;
1271                 }
1272                 ret = btrfs_record_file_extent(trans, root, objectid,
1273                                                &btrfs_inode, bytenr, bytenr,
1274                                                num_bytes);
1275                 if (ret)
1276                         goto fail;
1277                 last_byte = bytenr + num_bytes;
1278                 btrfs_release_path(&path);
1279
1280                 if (trans->blocks_used >= 4096) {
1281                         ret = btrfs_commit_transaction(trans, root);
1282                         BUG_ON(ret);
1283                         trans = btrfs_start_transaction(root, 1);
1284                         BUG_ON(!trans);
1285                 }
1286         }
1287         btrfs_release_path(&path);
1288         if (total_bytes > last_byte) {
1289                 ret = create_image_file_range(trans, root, objectid,
1290                                               &btrfs_inode, last_byte,
1291                                               total_bytes, ext2_fs);
1292                 if (ret)
1293                         goto fail;
1294         }
1295
1296         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1297         if (ret)
1298                 goto fail;
1299
1300         location.objectid = objectid;
1301         location.offset = 0;
1302         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1303         ret = btrfs_insert_dir_item(trans, root, name, strlen(name),
1304                                     btrfs_root_dirid(&root->root_item),
1305                                     &location, EXT2_FT_REG_FILE, objectid);
1306         if (ret)
1307                 goto fail;
1308         ret = btrfs_insert_inode_ref(trans, root, name, strlen(name),
1309                                      objectid,
1310                                      btrfs_root_dirid(&root->root_item),
1311                                      objectid);
1312         if (ret)
1313                 goto fail;
1314         location.objectid = btrfs_root_dirid(&root->root_item);
1315         location.offset = 0;
1316         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1317         ret = btrfs_lookup_inode(trans, root, &path, &location, 1);
1318         if (ret)
1319                 goto fail;
1320         leaf = path.nodes[0];
1321         inode_item = btrfs_item_ptr(leaf, path.slots[0],
1322                                     struct btrfs_inode_item);
1323         btrfs_set_inode_size(leaf, inode_item, strlen(name) * 2 +
1324                              btrfs_inode_size(leaf, inode_item));
1325         btrfs_mark_buffer_dirty(leaf);
1326         btrfs_release_path(&path);
1327         ret = btrfs_commit_transaction(trans, root);
1328         BUG_ON(ret);
1329 fail:
1330         btrfs_release_path(&path);
1331         return ret;
1332 }
1333
1334 static struct btrfs_root * link_subvol(struct btrfs_root *root,
1335                 const char *base, u64 root_objectid)
1336 {
1337         struct btrfs_trans_handle *trans;
1338         struct btrfs_fs_info *fs_info = root->fs_info;
1339         struct btrfs_root *tree_root = fs_info->tree_root;
1340         struct btrfs_root *new_root = NULL;
1341         struct btrfs_path *path;
1342         struct btrfs_inode_item *inode_item;
1343         struct extent_buffer *leaf;
1344         struct btrfs_key key;
1345         u64 dirid = btrfs_root_dirid(&root->root_item);
1346         u64 index = 2;
1347         char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
1348         int len;
1349         int i;
1350         int ret;
1351
1352         len = strlen(base);
1353         if (len < 1 || len > BTRFS_NAME_LEN)
1354                 return NULL;
1355
1356         path = btrfs_alloc_path();
1357         BUG_ON(!path);
1358
1359         key.objectid = dirid;
1360         key.type = BTRFS_DIR_INDEX_KEY;
1361         key.offset = (u64)-1;
1362
1363         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1364         BUG_ON(ret <= 0);
1365
1366         if (path->slots[0] > 0) {
1367                 path->slots[0]--;
1368                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1369                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
1370                         index = key.offset + 1;
1371         }
1372         btrfs_release_path(path);
1373
1374         trans = btrfs_start_transaction(root, 1);
1375         BUG_ON(!trans);
1376
1377         key.objectid = dirid;
1378         key.offset = 0;
1379         key.type =  BTRFS_INODE_ITEM_KEY;
1380
1381         ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1382         BUG_ON(ret);
1383         leaf = path->nodes[0];
1384         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1385                                     struct btrfs_inode_item);
1386
1387         key.objectid = root_objectid;
1388         key.offset = (u64)-1;
1389         key.type = BTRFS_ROOT_ITEM_KEY;
1390
1391         memcpy(buf, base, len);
1392         for (i = 0; i < 1024; i++) {
1393                 ret = btrfs_insert_dir_item(trans, root, buf, len,
1394                                             dirid, &key, BTRFS_FT_DIR, index);
1395                 if (ret != -EEXIST)
1396                         break;
1397                 len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
1398                 if (len < 1 || len > BTRFS_NAME_LEN) {
1399                         ret = -EINVAL;
1400                         break;
1401                 }
1402         }
1403         if (ret)
1404                 goto fail;
1405
1406         btrfs_set_inode_size(leaf, inode_item, len * 2 +
1407                              btrfs_inode_size(leaf, inode_item));
1408         btrfs_mark_buffer_dirty(leaf);
1409         btrfs_release_path(path);
1410
1411         /* add the backref first */
1412         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
1413                                  BTRFS_ROOT_BACKREF_KEY,
1414                                  root->root_key.objectid,
1415                                  dirid, index, buf, len);
1416         BUG_ON(ret);
1417
1418         /* now add the forward ref */
1419         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
1420                                  BTRFS_ROOT_REF_KEY, root_objectid,
1421                                  dirid, index, buf, len);
1422
1423         ret = btrfs_commit_transaction(trans, root);
1424         BUG_ON(ret);
1425
1426         new_root = btrfs_read_fs_root(fs_info, &key);
1427         if (IS_ERR(new_root))
1428                 new_root = NULL;
1429 fail:
1430         btrfs_free_path(path);
1431         return new_root;
1432 }
1433
1434 static int create_chunk_mapping(struct btrfs_trans_handle *trans,
1435                                 struct btrfs_root *root)
1436 {
1437         struct btrfs_fs_info *info = root->fs_info;
1438         struct btrfs_root *chunk_root = info->chunk_root;
1439         struct btrfs_root *extent_root = info->extent_root;
1440         struct btrfs_device *device;
1441         struct btrfs_block_group_cache *cache;
1442         struct btrfs_dev_extent *extent;
1443         struct extent_buffer *leaf;
1444         struct btrfs_chunk chunk;
1445         struct btrfs_key key;
1446         struct btrfs_path path;
1447         u64 cur_start;
1448         u64 total_bytes;
1449         u64 chunk_objectid;
1450         int ret;
1451
1452         btrfs_init_path(&path);
1453
1454         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1455         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1456
1457         BUG_ON(list_empty(&info->fs_devices->devices));
1458         device = list_entry(info->fs_devices->devices.next,
1459                             struct btrfs_device, dev_list);
1460         BUG_ON(device->devid != info->fs_devices->latest_devid);
1461
1462         /* delete device extent created by make_btrfs */
1463         key.objectid = device->devid;
1464         key.offset = 0;
1465         key.type = BTRFS_DEV_EXTENT_KEY;
1466         ret = btrfs_search_slot(trans, device->dev_root, &key, &path, -1, 1);
1467         if (ret < 0)
1468                 goto err;
1469
1470         BUG_ON(ret > 0);
1471         ret = btrfs_del_item(trans, device->dev_root, &path);
1472         if (ret)
1473                 goto err;
1474         btrfs_release_path(&path);
1475
1476         /* delete chunk item created by make_btrfs */
1477         key.objectid = chunk_objectid;
1478         key.offset = 0;
1479         key.type = BTRFS_CHUNK_ITEM_KEY;
1480         ret = btrfs_search_slot(trans, chunk_root, &key, &path, -1, 1);
1481         if (ret < 0)
1482                 goto err;
1483
1484         BUG_ON(ret > 0);
1485         ret = btrfs_del_item(trans, chunk_root, &path);
1486         if (ret)
1487                 goto err;
1488         btrfs_release_path(&path);
1489
1490         /* for each block group, create device extent and chunk item */
1491         cur_start = 0;
1492         while (cur_start < total_bytes) {
1493                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
1494                 BUG_ON(!cache);
1495
1496                 /* insert device extent */
1497                 key.objectid = device->devid;
1498                 key.offset = cache->key.objectid;
1499                 key.type = BTRFS_DEV_EXTENT_KEY;
1500                 ret = btrfs_insert_empty_item(trans, device->dev_root, &path,
1501                                               &key, sizeof(*extent));
1502                 if (ret)
1503                         goto err;
1504
1505                 leaf = path.nodes[0];
1506                 extent = btrfs_item_ptr(leaf, path.slots[0],
1507                                         struct btrfs_dev_extent);
1508
1509                 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1510                                                 chunk_root->root_key.objectid);
1511                 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1512                                                     chunk_objectid);
1513                 btrfs_set_dev_extent_chunk_offset(leaf, extent,
1514                                                   cache->key.objectid);
1515                 btrfs_set_dev_extent_length(leaf, extent, cache->key.offset);
1516                 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1517                     (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
1518                     BTRFS_UUID_SIZE);
1519                 btrfs_mark_buffer_dirty(leaf);
1520                 btrfs_release_path(&path);
1521
1522                 /* insert chunk item */
1523                 btrfs_set_stack_chunk_length(&chunk, cache->key.offset);
1524                 btrfs_set_stack_chunk_owner(&chunk,
1525                                             extent_root->root_key.objectid);
1526                 btrfs_set_stack_chunk_stripe_len(&chunk, STRIPE_LEN);
1527                 btrfs_set_stack_chunk_type(&chunk, cache->flags);
1528                 btrfs_set_stack_chunk_io_align(&chunk, device->io_align);
1529                 btrfs_set_stack_chunk_io_width(&chunk, device->io_width);
1530                 btrfs_set_stack_chunk_sector_size(&chunk, device->sector_size);
1531                 btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1532                 btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1533                 btrfs_set_stack_stripe_devid(&chunk.stripe, device->devid);
1534                 btrfs_set_stack_stripe_offset(&chunk.stripe,
1535                                               cache->key.objectid);
1536                 memcpy(&chunk.stripe.dev_uuid, device->uuid, BTRFS_UUID_SIZE);
1537
1538                 key.objectid = chunk_objectid;
1539                 key.offset = cache->key.objectid;
1540                 key.type = BTRFS_CHUNK_ITEM_KEY;
1541
1542                 ret = btrfs_insert_item(trans, chunk_root, &key, &chunk,
1543                                         btrfs_chunk_item_size(1));
1544                 if (ret)
1545                         goto err;
1546
1547                 cur_start = cache->key.objectid + cache->key.offset;
1548         }
1549
1550         device->bytes_used = total_bytes;
1551         ret = btrfs_update_device(trans, device);
1552 err:
1553         btrfs_release_path(&path);
1554         return ret;
1555 }
1556
1557 static int create_subvol(struct btrfs_trans_handle *trans,
1558                          struct btrfs_root *root, u64 root_objectid)
1559 {
1560         struct extent_buffer *tmp;
1561         struct btrfs_root *new_root;
1562         struct btrfs_key key;
1563         struct btrfs_root_item root_item;
1564         int ret;
1565
1566         ret = btrfs_copy_root(trans, root, root->node, &tmp,
1567                               root_objectid);
1568         BUG_ON(ret);
1569
1570         memcpy(&root_item, &root->root_item, sizeof(root_item));
1571         btrfs_set_root_bytenr(&root_item, tmp->start);
1572         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
1573         btrfs_set_root_generation(&root_item, trans->transid);
1574         free_extent_buffer(tmp);
1575
1576         key.objectid = root_objectid;
1577         key.type = BTRFS_ROOT_ITEM_KEY;
1578         key.offset = trans->transid;
1579         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
1580                                 &key, &root_item);
1581
1582         key.offset = (u64)-1;
1583         new_root = btrfs_read_fs_root(root->fs_info, &key);
1584         BUG_ON(!new_root || IS_ERR(new_root));
1585
1586         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
1587         BUG_ON(ret);
1588
1589         return 0;
1590 }
1591
1592 static int init_btrfs(struct btrfs_root *root)
1593 {
1594         int ret;
1595         struct btrfs_key location;
1596         struct btrfs_trans_handle *trans;
1597         struct btrfs_fs_info *fs_info = root->fs_info;
1598         struct extent_buffer *tmp;
1599
1600         trans = btrfs_start_transaction(root, 1);
1601         BUG_ON(!trans);
1602         ret = btrfs_make_block_groups(trans, root);
1603         if (ret)
1604                 goto err;
1605         ret = btrfs_fix_block_accounting(trans, root);
1606         if (ret)
1607                 goto err;
1608         ret = create_chunk_mapping(trans, root);
1609         if (ret)
1610                 goto err;
1611         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1612                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1613         if (ret)
1614                 goto err;
1615         memcpy(&location, &root->root_key, sizeof(location));
1616         location.offset = (u64)-1;
1617         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1618                                 btrfs_super_root_dir(fs_info->super_copy),
1619                                 &location, BTRFS_FT_DIR, 0);
1620         if (ret)
1621                 goto err;
1622         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1623                                 location.objectid,
1624                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1625         if (ret)
1626                 goto err;
1627         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1628                              BTRFS_FIRST_FREE_OBJECTID);
1629
1630         /* subvol for ext2 image file */
1631         ret = create_subvol(trans, root, EXT2_IMAGE_SUBVOL_OBJECTID);
1632         BUG_ON(ret);
1633         /* subvol for data relocation */
1634         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1635         BUG_ON(ret);
1636
1637         extent_buffer_get(fs_info->csum_root->node);
1638         ret = __btrfs_cow_block(trans, fs_info->csum_root,
1639                                 fs_info->csum_root->node, NULL, 0, &tmp, 0, 0);
1640         BUG_ON(ret);
1641         free_extent_buffer(tmp);
1642
1643         ret = btrfs_commit_transaction(trans, root);
1644         BUG_ON(ret);
1645 err:
1646         return ret;
1647 }
1648
1649 /*
1650  * Migrate super block to it's default position and zero 0 ~ 16k
1651  */
1652 static int migrate_super_block(int fd, u64 old_bytenr, u32 sectorsize)
1653 {
1654         int ret;
1655         struct extent_buffer *buf;
1656         struct btrfs_super_block *super;
1657         u32 len;
1658         u32 bytenr;
1659
1660         BUG_ON(sectorsize < sizeof(*super));
1661         buf = malloc(sizeof(*buf) + sectorsize);
1662         if (!buf)
1663                 return -ENOMEM;
1664
1665         buf->len = sectorsize;
1666         ret = pread(fd, buf->data, sectorsize, old_bytenr);
1667         if (ret != sectorsize)
1668                 goto fail;
1669
1670         super = (struct btrfs_super_block *)buf->data;
1671         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1672         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1673
1674         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1675         ret = pwrite(fd, buf->data, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1676         if (ret != sectorsize)
1677                 goto fail;
1678
1679         ret = fsync(fd);
1680         if (ret)
1681                 goto fail;
1682
1683         memset(buf->data, 0, sectorsize);
1684         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
1685                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
1686                 if (len > sectorsize)
1687                         len = sectorsize;
1688                 ret = pwrite(fd, buf->data, len, bytenr);
1689                 if (ret != len) {
1690                         fprintf(stderr, "unable to zero fill device\n");
1691                         break;
1692                 }
1693                 bytenr += len;
1694         }
1695         ret = 0;
1696         fsync(fd);
1697 fail:
1698         free(buf);
1699         if (ret > 0)
1700                 ret = -1;
1701         return ret;
1702 }
1703
1704 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
1705 {
1706         struct btrfs_chunk *chunk;
1707         struct btrfs_disk_key *key;
1708         u32 sectorsize = btrfs_super_sectorsize(super);
1709
1710         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1711         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1712                                        sizeof(struct btrfs_disk_key));
1713
1714         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1715         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1716         btrfs_set_disk_key_offset(key, 0);
1717
1718         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
1719         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1720         btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
1721         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1722         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1723         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1724         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1725         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1726         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1727         chunk->stripe.devid = super->dev_item.devid;
1728         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1729         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1730         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1731         return 0;
1732 }
1733
1734 static int prepare_system_chunk(int fd, u64 sb_bytenr, u32 sectorsize)
1735 {
1736         int ret;
1737         struct extent_buffer *buf;
1738         struct btrfs_super_block *super;
1739
1740         BUG_ON(sectorsize < sizeof(*super));
1741         buf = malloc(sizeof(*buf) + sectorsize);
1742         if (!buf)
1743                 return -ENOMEM;
1744
1745         buf->len = sectorsize;
1746         ret = pread(fd, buf->data, sectorsize, sb_bytenr);
1747         if (ret != sectorsize)
1748                 goto fail;
1749
1750         super = (struct btrfs_super_block *)buf->data;
1751         BUG_ON(btrfs_super_bytenr(super) != sb_bytenr);
1752         BUG_ON(btrfs_super_num_devices(super) != 1);
1753
1754         ret = prepare_system_chunk_sb(super);
1755         if (ret)
1756                 goto fail;
1757
1758         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1759         ret = pwrite(fd, buf->data, sectorsize, sb_bytenr);
1760         if (ret != sectorsize)
1761                 goto fail;
1762
1763         ret = 0;
1764 fail:
1765         free(buf);
1766         if (ret > 0)
1767                 ret = -1;
1768         return ret;
1769 }
1770
1771 static int relocate_one_reference(struct btrfs_trans_handle *trans,
1772                                   struct btrfs_root *root,
1773                                   u64 extent_start, u64 extent_size,
1774                                   struct btrfs_key *extent_key,
1775                                   struct extent_io_tree *reloc_tree)
1776 {
1777         struct extent_buffer *leaf;
1778         struct btrfs_file_extent_item *fi;
1779         struct btrfs_key key;
1780         struct btrfs_path path;
1781         struct btrfs_inode_item inode;
1782         struct blk_iterate_data data;
1783         u64 bytenr;
1784         u64 num_bytes;
1785         u64 cur_offset;
1786         u64 new_pos;
1787         u64 nbytes;
1788         u64 sector_end;
1789         u32 sectorsize = root->sectorsize;
1790         unsigned long ptr;
1791         int datacsum;
1792         int fd;
1793         int ret;
1794
1795         btrfs_init_path(&path);
1796         ret = btrfs_search_slot(trans, root, extent_key, &path, -1, 1);
1797         if (ret)
1798                 goto fail;
1799
1800         leaf = path.nodes[0];
1801         fi = btrfs_item_ptr(leaf, path.slots[0],
1802                             struct btrfs_file_extent_item);
1803         BUG_ON(btrfs_file_extent_offset(leaf, fi) > 0);
1804         if (extent_start != btrfs_file_extent_disk_bytenr(leaf, fi) ||
1805             extent_size != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1806                 ret = 1;
1807                 goto fail;
1808         }
1809
1810         bytenr = extent_start + btrfs_file_extent_offset(leaf, fi);
1811         num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
1812
1813         ret = btrfs_del_item(trans, root, &path);
1814         if (ret)
1815                 goto fail;
1816
1817         ret = btrfs_free_extent(trans, root, extent_start, extent_size, 0,
1818                                 root->root_key.objectid,
1819                                 extent_key->objectid, extent_key->offset);
1820         if (ret)
1821                 goto fail;
1822
1823         btrfs_release_path(&path);
1824
1825         key.objectid = extent_key->objectid;
1826         key.offset = 0;
1827         key.type =  BTRFS_INODE_ITEM_KEY;
1828         ret = btrfs_lookup_inode(trans, root, &path, &key, 0);
1829         if (ret)
1830                 goto fail;
1831
1832         leaf = path.nodes[0];
1833         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
1834         read_extent_buffer(leaf, &inode, ptr, sizeof(inode));
1835         btrfs_release_path(&path);
1836
1837         BUG_ON(num_bytes & (sectorsize - 1));
1838         nbytes = btrfs_stack_inode_nbytes(&inode) - num_bytes;
1839         btrfs_set_stack_inode_nbytes(&inode, nbytes);
1840         datacsum = !(btrfs_stack_inode_flags(&inode) & BTRFS_INODE_NODATASUM);
1841
1842         data = (struct blk_iterate_data) {
1843                 .trans          = trans,
1844                 .root           = root,
1845                 .inode          = &inode,
1846                 .objectid       = extent_key->objectid,
1847                 .first_block    = extent_key->offset / sectorsize,
1848                 .disk_block     = 0,
1849                 .num_blocks     = 0,
1850                 .boundary       = (u64)-1,
1851                 .checksum       = datacsum,
1852                 .errcode        = 0,
1853         };
1854
1855         cur_offset = extent_key->offset;
1856         while (num_bytes > 0) {
1857                 sector_end = bytenr + sectorsize - 1;
1858                 if (test_range_bit(reloc_tree, bytenr, sector_end,
1859                                    EXTENT_LOCKED, 1)) {
1860                         ret = get_state_private(reloc_tree, bytenr, &new_pos);
1861                         BUG_ON(ret);
1862                 } else {
1863                         ret = custom_alloc_extent(root, sectorsize, 0, &key);
1864                         if (ret)
1865                                 goto fail;
1866                         new_pos = key.objectid;
1867
1868                         if (cur_offset == extent_key->offset) {
1869                                 fd = root->fs_info->fs_devices->latest_bdev;
1870                                 readahead(fd, bytenr, num_bytes);
1871                         }
1872                         ret = copy_disk_extent(root, new_pos, bytenr,
1873                                                sectorsize);
1874                         if (ret)
1875                                 goto fail;
1876                         ret = set_extent_bits(reloc_tree, bytenr, sector_end,
1877                                               EXTENT_LOCKED, GFP_NOFS);
1878                         BUG_ON(ret);
1879                         ret = set_state_private(reloc_tree, bytenr, new_pos);
1880                         BUG_ON(ret);
1881                 }
1882
1883                 ret = block_iterate_proc(NULL, new_pos / sectorsize,
1884                                          cur_offset / sectorsize, &data);
1885                 if (ret & BLOCK_ABORT) {
1886                         ret = data.errcode;
1887                         goto fail;
1888                 }
1889
1890                 cur_offset += sectorsize;
1891                 bytenr += sectorsize;
1892                 num_bytes -= sectorsize;
1893         }
1894
1895         if (data.num_blocks > 0) {
1896                 ret = record_file_blocks(trans, root,
1897                                          extent_key->objectid, &inode,
1898                                          data.first_block, data.disk_block,
1899                                          data.num_blocks, datacsum);
1900                 if (ret)
1901                         goto fail;
1902         }
1903
1904         key.objectid = extent_key->objectid;
1905         key.offset = 0;
1906         key.type =  BTRFS_INODE_ITEM_KEY;
1907         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
1908         if (ret)
1909                 goto fail;
1910
1911         leaf = path.nodes[0];
1912         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
1913         write_extent_buffer(leaf, &inode, ptr, sizeof(inode));
1914         btrfs_mark_buffer_dirty(leaf);
1915         btrfs_release_path(&path);
1916
1917 fail:
1918         btrfs_release_path(&path);
1919         return ret;
1920 }
1921
1922 static int relocate_extents_range(struct btrfs_root *fs_root,
1923                                   struct btrfs_root *ext2_root,
1924                                   u64 start_byte, u64 end_byte)
1925 {
1926         struct btrfs_fs_info *info = fs_root->fs_info;
1927         struct btrfs_root *extent_root = info->extent_root;
1928         struct btrfs_root *cur_root = NULL;
1929         struct btrfs_trans_handle *trans;
1930         struct btrfs_extent_data_ref *dref;
1931         struct btrfs_extent_inline_ref *iref;
1932         struct btrfs_extent_item *ei;
1933         struct extent_buffer *leaf;
1934         struct btrfs_key key;
1935         struct btrfs_key extent_key;
1936         struct btrfs_path path;
1937         struct extent_io_tree reloc_tree;
1938         unsigned long ptr;
1939         unsigned long end;
1940         u64 cur_byte;
1941         u64 num_bytes;
1942         u64 ref_root;
1943         u64 num_extents;
1944         int pass = 0;
1945         int ret;
1946
1947         btrfs_init_path(&path);
1948         extent_io_tree_init(&reloc_tree);
1949
1950         key.objectid = start_byte;
1951         key.offset = 0;
1952         key.type = BTRFS_EXTENT_ITEM_KEY;
1953         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
1954         if (ret < 0)
1955                 goto fail;
1956         if (ret > 0) {
1957                 ret = btrfs_previous_item(extent_root, &path, 0,
1958                                           BTRFS_EXTENT_ITEM_KEY);
1959                 if (ret < 0)
1960                         goto fail;
1961                 if (ret == 0) {
1962                         leaf = path.nodes[0];
1963                         btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1964                         if (key.objectid + key.offset > start_byte)
1965                                 start_byte = key.objectid;
1966                 }
1967         }
1968         btrfs_release_path(&path);
1969 again:
1970         cur_root = (pass % 2 == 0) ? ext2_root : fs_root;
1971         num_extents = 0;
1972
1973         trans = btrfs_start_transaction(cur_root, 1);
1974         BUG_ON(!trans);
1975
1976         cur_byte = start_byte;
1977         while (1) {
1978                 key.objectid = cur_byte;
1979                 key.offset = 0;
1980                 key.type = BTRFS_EXTENT_ITEM_KEY;
1981                 ret = btrfs_search_slot(trans, extent_root,
1982                                         &key, &path, 0, 0);
1983                 if (ret < 0)
1984                         goto fail;
1985 next:
1986                 leaf = path.nodes[0];
1987                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1988                         ret = btrfs_next_leaf(extent_root, &path);
1989                         if (ret < 0)
1990                                 goto fail;
1991                         if (ret > 0)
1992                                 break;
1993                         leaf = path.nodes[0];
1994                 }
1995
1996                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1997                 if (key.objectid < cur_byte ||
1998                     key.type != BTRFS_EXTENT_ITEM_KEY) {
1999                         path.slots[0]++;
2000                         goto next;
2001                 }
2002                 if (key.objectid >= end_byte)
2003                         break;
2004
2005                 num_extents++;
2006
2007                 cur_byte = key.objectid;
2008                 num_bytes = key.offset;
2009                 ei = btrfs_item_ptr(leaf, path.slots[0],
2010                                     struct btrfs_extent_item);
2011                 BUG_ON(!(btrfs_extent_flags(leaf, ei) &
2012                          BTRFS_EXTENT_FLAG_DATA));
2013
2014                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2015                 end = ptr + btrfs_item_size_nr(leaf, path.slots[0]);
2016
2017                 ptr += sizeof(struct btrfs_extent_item);
2018
2019                 while (ptr < end) {
2020                         iref = (struct btrfs_extent_inline_ref *)ptr;
2021                         key.type = btrfs_extent_inline_ref_type(leaf, iref);
2022                         BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
2023                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
2024                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
2025                         extent_key.objectid =
2026                                 btrfs_extent_data_ref_objectid(leaf, dref);
2027                         extent_key.offset =
2028                                 btrfs_extent_data_ref_offset(leaf, dref);
2029                         extent_key.type = BTRFS_EXTENT_DATA_KEY;
2030                         BUG_ON(btrfs_extent_data_ref_count(leaf, dref) != 1);
2031
2032                         if (ref_root == cur_root->root_key.objectid)
2033                                 break;
2034
2035                         ptr += btrfs_extent_inline_ref_size(key.type);
2036                 }
2037
2038                 if (ptr >= end) {
2039                         path.slots[0]++;
2040                         goto next;
2041                 }
2042
2043                 ret = relocate_one_reference(trans, cur_root, cur_byte,
2044                                              num_bytes, &extent_key,
2045                                              &reloc_tree);
2046                 if (ret < 0)
2047                         goto fail;
2048
2049                 cur_byte += num_bytes;
2050                 btrfs_release_path(&path);
2051
2052                 if (trans->blocks_used >= 4096) {
2053                         ret = btrfs_commit_transaction(trans, cur_root);
2054                         BUG_ON(ret);
2055                         trans = btrfs_start_transaction(cur_root, 1);
2056                         BUG_ON(!trans);
2057                 }
2058         }
2059         btrfs_release_path(&path);
2060
2061         ret = btrfs_commit_transaction(trans, cur_root);
2062         BUG_ON(ret);
2063
2064         if (num_extents > 0 && pass++ < 16)
2065                 goto again;
2066
2067         ret = (num_extents > 0) ? -1 : 0;
2068 fail:
2069         btrfs_release_path(&path);
2070         extent_io_tree_cleanup(&reloc_tree);
2071         return ret;
2072 }
2073
2074 /*
2075  * relocate data in system chunk
2076  */
2077 static int cleanup_sys_chunk(struct btrfs_root *fs_root,
2078                              struct btrfs_root *ext2_root)
2079 {
2080         struct btrfs_block_group_cache *cache;
2081         int i, ret = 0;
2082         u64 offset = 0;
2083         u64 end_byte;
2084
2085         while(1) {
2086                 cache = btrfs_lookup_block_group(fs_root->fs_info, offset);
2087                 if (!cache)
2088                         break;
2089
2090                 end_byte = cache->key.objectid + cache->key.offset;
2091                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2092                         ret = relocate_extents_range(fs_root, ext2_root,
2093                                                      cache->key.objectid,
2094                                                      end_byte);
2095                         if (ret)
2096                                 goto fail;
2097                 }
2098                 offset = end_byte;
2099         }
2100         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2101                 offset = btrfs_sb_offset(i);
2102                 offset &= ~((u64)STRIPE_LEN - 1);
2103
2104                 ret = relocate_extents_range(fs_root, ext2_root,
2105                                              offset, offset + STRIPE_LEN);
2106                 if (ret)
2107                         goto fail;
2108         }
2109         ret = 0;
2110 fail:
2111         return ret;
2112 }
2113
2114 static int fixup_chunk_mapping(struct btrfs_root *root)
2115 {
2116         struct btrfs_trans_handle *trans;
2117         struct btrfs_fs_info *info = root->fs_info;
2118         struct btrfs_root *chunk_root = info->chunk_root;
2119         struct extent_buffer *leaf;
2120         struct btrfs_key key;
2121         struct btrfs_path path;
2122         struct btrfs_chunk chunk;
2123         unsigned long ptr;
2124         u32 size;
2125         u64 type;
2126         int ret;
2127
2128         btrfs_init_path(&path);
2129
2130         trans = btrfs_start_transaction(root, 1);
2131         BUG_ON(!trans);
2132
2133         /*
2134          * recow the whole chunk tree. this will move all chunk tree blocks
2135          * into system block group.
2136          */
2137         memset(&key, 0, sizeof(key));
2138         while (1) {
2139                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2140                 if (ret < 0)
2141                         goto err;
2142
2143                 ret = btrfs_next_leaf(chunk_root, &path);
2144                 if (ret < 0)
2145                         goto err;
2146                 if (ret > 0)
2147                         break;
2148
2149                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2150                 btrfs_release_path(&path);
2151         }
2152         btrfs_release_path(&path);
2153
2154         /* fixup the system chunk array in super block */
2155         btrfs_set_super_sys_array_size(info->super_copy, 0);
2156
2157         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2158         key.offset = 0;
2159         key.type = BTRFS_CHUNK_ITEM_KEY;
2160
2161         ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 0);
2162         if (ret < 0)
2163                 goto err;
2164         BUG_ON(ret != 0);
2165         while(1) {
2166                 leaf = path.nodes[0];
2167                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2168                         ret = btrfs_next_leaf(chunk_root, &path);
2169                         if (ret < 0)
2170                                 goto err;
2171                         if (ret > 0)
2172                                 break;
2173                         leaf = path.nodes[0];
2174                 }
2175                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2176                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2177                         goto next;
2178
2179                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2180                 size = btrfs_item_size_nr(leaf, path.slots[0]);
2181                 BUG_ON(size != sizeof(chunk));
2182                 read_extent_buffer(leaf, &chunk, ptr, size);
2183                 type = btrfs_stack_chunk_type(&chunk);
2184
2185                 if (!(type & BTRFS_BLOCK_GROUP_SYSTEM))
2186                         goto next;
2187
2188                 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
2189                                              &chunk, size);
2190                 if (ret)
2191                         goto err;
2192 next:
2193                 path.slots[0]++;
2194         }
2195
2196         ret = btrfs_commit_transaction(trans, root);
2197         BUG_ON(ret);
2198 err:
2199         btrfs_release_path(&path);
2200         return ret;
2201 }
2202
2203 static int do_convert(const char *devname, int datacsum, int packing,
2204                 int noxattr)
2205 {
2206         int i, ret;
2207         int fd = -1;
2208         u32 blocksize;
2209         u64 blocks[7];
2210         u64 total_bytes;
2211         u64 super_bytenr;
2212         ext2_filsys ext2_fs;
2213         struct btrfs_root *root;
2214         struct btrfs_root *ext2_root;
2215
2216         ret = open_ext2fs(devname, &ext2_fs);
2217         if (ret) {
2218                 fprintf(stderr, "unable to open the Ext2fs\n");
2219                 goto fail;
2220         }
2221         blocksize = ext2_fs->blocksize;
2222         total_bytes = (u64)ext2_fs->super->s_blocks_count * blocksize;
2223         if (blocksize < 4096) {
2224                 fprintf(stderr, "block size is too small\n");
2225                 goto fail;
2226         }
2227         if (!(ext2_fs->super->s_feature_incompat &
2228               EXT2_FEATURE_INCOMPAT_FILETYPE)) {
2229                 fprintf(stderr, "filetype feature is missing\n");
2230                 goto fail;
2231         }
2232         for (i = 0; i < 7; i++) {
2233                 ret = ext2_alloc_block(ext2_fs, 0, blocks + i);
2234                 if (ret) {
2235                         fprintf(stderr, "not enough free space\n");
2236                         goto fail;
2237                 }
2238                 blocks[i] *= blocksize;
2239         }
2240         super_bytenr = blocks[0];
2241         fd = open(devname, O_RDWR);
2242         if (fd < 0) {
2243                 fprintf(stderr, "unable to open %s\n", devname);
2244                 goto fail;
2245         }
2246         ret = make_btrfs(fd, devname, ext2_fs->super->s_volume_name,
2247                          blocks, total_bytes, blocksize, blocksize,
2248                          blocksize, blocksize, 0);
2249         if (ret) {
2250                 fprintf(stderr, "unable to create initial ctree: %s\n",
2251                         strerror(-ret));
2252                 goto fail;
2253         }
2254         /* create a system chunk that maps the whole device */
2255         ret = prepare_system_chunk(fd, super_bytenr, blocksize);
2256         if (ret) {
2257                 fprintf(stderr, "unable to update system chunk\n");
2258                 goto fail;
2259         }
2260         root = open_ctree_fd(fd, devname, super_bytenr, OPEN_CTREE_WRITES);
2261         if (!root) {
2262                 fprintf(stderr, "unable to open ctree\n");
2263                 goto fail;
2264         }
2265         ret = cache_free_extents(root, ext2_fs);
2266         if (ret) {
2267                 fprintf(stderr, "error during cache_free_extents %d\n", ret);
2268                 goto fail;
2269         }
2270         root->fs_info->extent_ops = &extent_ops;
2271         /* recover block allocation bitmap */
2272         for (i = 0; i < 7; i++) {
2273                 blocks[i] /= blocksize;
2274                 ext2_free_block(ext2_fs, blocks[i]);
2275         }
2276         ret = init_btrfs(root);
2277         if (ret) {
2278                 fprintf(stderr, "unable to setup the root tree\n");
2279                 goto fail;
2280         }
2281         printf("creating btrfs metadata.\n");
2282         ret = copy_inodes(root, ext2_fs, datacsum, packing, noxattr);
2283         if (ret) {
2284                 fprintf(stderr, "error during copy_inodes %d\n", ret);
2285                 goto fail;
2286         }
2287         printf("creating ext2fs image file.\n");
2288         ext2_root = link_subvol(root, "ext2_saved", EXT2_IMAGE_SUBVOL_OBJECTID);
2289         if (!ext2_root) {
2290                 fprintf(stderr, "unable to create subvol\n");
2291                 goto fail;
2292         }
2293         ret = create_ext2_image(ext2_root, ext2_fs, "image");
2294         if (ret) {
2295                 fprintf(stderr, "error during create_ext2_image %d\n", ret);
2296                 goto fail;
2297         }
2298         printf("cleaning up system chunk.\n");
2299         ret = cleanup_sys_chunk(root, ext2_root);
2300         if (ret) {
2301                 fprintf(stderr, "error during cleanup_sys_chunk %d\n", ret);
2302                 goto fail;
2303         }
2304         ret = close_ctree(root);
2305         if (ret) {
2306                 fprintf(stderr, "error during close_ctree %d\n", ret);
2307                 goto fail;
2308         }
2309         close_ext2fs(ext2_fs);
2310
2311         /*
2312          * If this step succeed, we get a mountable btrfs. Otherwise
2313          * the ext2fs is left unchanged.
2314          */
2315         ret = migrate_super_block(fd, super_bytenr, blocksize);
2316         if (ret) {
2317                 fprintf(stderr, "unable to migrate super block\n");
2318                 goto fail;
2319         }
2320
2321         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
2322         if (!root) {
2323                 fprintf(stderr, "unable to open ctree\n");
2324                 goto fail;
2325         }
2326         /* move chunk tree into system chunk. */
2327         ret = fixup_chunk_mapping(root);
2328         if (ret) {
2329                 fprintf(stderr, "error during fixup_chunk_tree\n");
2330                 goto fail;
2331         }
2332         ret = close_ctree(root);
2333         close(fd);
2334
2335         printf("conversion complete.\n");
2336         return 0;
2337 fail:
2338         if (fd != -1)
2339                 close(fd);
2340         fprintf(stderr, "conversion aborted.\n");
2341         return -1;
2342 }
2343
2344 static int may_rollback(struct btrfs_root *root)
2345 {
2346         struct btrfs_fs_info *info = root->fs_info;
2347         struct btrfs_multi_bio *multi = NULL;
2348         u64 bytenr;
2349         u64 length;
2350         u64 physical;
2351         u64 total_bytes;
2352         int num_stripes;
2353         int ret;
2354
2355         if (btrfs_super_num_devices(info->super_copy) != 1)
2356                 goto fail;
2357
2358         bytenr = BTRFS_SUPER_INFO_OFFSET;
2359         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
2360
2361         while (1) {
2362                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
2363                                       &length, &multi, 0, NULL);
2364                 if (ret)
2365                         goto fail;
2366
2367                 num_stripes = multi->num_stripes;
2368                 physical = multi->stripes[0].physical;
2369                 kfree(multi);
2370
2371                 if (num_stripes != 1 || physical != bytenr)
2372                         goto fail;
2373
2374                 bytenr += length;
2375                 if (bytenr >= total_bytes)
2376                         break;
2377         }
2378         return 0;
2379 fail:
2380         return -1;
2381 }
2382
2383 static int do_rollback(const char *devname, int force)
2384 {
2385         int fd = -1;
2386         int ret;
2387         int i;
2388         struct btrfs_root *root;
2389         struct btrfs_root *ext2_root;
2390         struct btrfs_root *chunk_root;
2391         struct btrfs_dir_item *dir;
2392         struct btrfs_inode_item *inode;
2393         struct btrfs_file_extent_item *fi;
2394         struct btrfs_trans_handle *trans;
2395         struct extent_buffer *leaf;
2396         struct btrfs_block_group_cache *cache1;
2397         struct btrfs_block_group_cache *cache2;
2398         struct btrfs_key key;
2399         struct btrfs_path path;
2400         struct extent_io_tree io_tree;
2401         char *buf = NULL;
2402         char *name;
2403         u64 bytenr;
2404         u64 num_bytes;
2405         u64 root_dir;
2406         u64 objectid;
2407         u64 offset;
2408         u64 start;
2409         u64 end;
2410         u64 sb_bytenr;
2411         u64 first_free;
2412         u64 total_bytes;
2413         u32 sectorsize;
2414
2415         extent_io_tree_init(&io_tree);
2416
2417         fd = open(devname, O_RDWR);
2418         if (fd < 0) {
2419                 fprintf(stderr, "unable to open %s\n", devname);
2420                 goto fail;
2421         }
2422         root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES);
2423         if (!root) {
2424                 fprintf(stderr, "unable to open ctree\n");
2425                 goto fail;
2426         }
2427         ret = may_rollback(root);
2428         if (ret < 0) {
2429                 fprintf(stderr, "unable to do rollback\n");
2430                 goto fail;
2431         }
2432
2433         sectorsize = root->sectorsize;
2434         buf = malloc(sectorsize);
2435         if (!buf) {
2436                 fprintf(stderr, "unable to allocate memory\n");
2437                 goto fail;
2438         }
2439
2440         btrfs_init_path(&path);
2441
2442         key.objectid = EXT2_IMAGE_SUBVOL_OBJECTID;
2443         key.type = BTRFS_ROOT_ITEM_KEY;
2444         key.offset = (u64)-1;
2445         ext2_root = btrfs_read_fs_root(root->fs_info, &key);
2446         if (!ext2_root || IS_ERR(ext2_root)) {
2447                 fprintf(stderr, "unable to open subvol %llu\n",
2448                         key.objectid);
2449                 goto fail;
2450         }
2451
2452         name = "image";
2453         root_dir = btrfs_root_dirid(&root->root_item);
2454         dir = btrfs_lookup_dir_item(NULL, ext2_root, &path,
2455                                    root_dir, name, strlen(name), 0);
2456         if (!dir || IS_ERR(dir)) {
2457                 fprintf(stderr, "unable to find file %s\n", name);
2458                 goto fail;
2459         }
2460         leaf = path.nodes[0];
2461         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
2462         btrfs_release_path(&path);
2463
2464         objectid = key.objectid;
2465
2466         ret = btrfs_lookup_inode(NULL, ext2_root, &path, &key, 0);
2467         if (ret) {
2468                 fprintf(stderr, "unable to find inode item\n");
2469                 goto fail;
2470         }
2471         leaf = path.nodes[0];
2472         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
2473         total_bytes = btrfs_inode_size(leaf, inode);
2474         btrfs_release_path(&path);
2475
2476         key.objectid = objectid;
2477         key.offset = 0;
2478         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2479         ret = btrfs_search_slot(NULL, ext2_root, &key, &path, 0, 0);
2480         if (ret != 0) {
2481                 fprintf(stderr, "unable to find first file extent\n");
2482                 btrfs_release_path(&path);
2483                 goto fail;
2484         }
2485
2486         /* build mapping tree for the relocated blocks */
2487         for (offset = 0; offset < total_bytes; ) {
2488                 leaf = path.nodes[0];
2489                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2490                         ret = btrfs_next_leaf(root, &path);
2491                         if (ret != 0)
2492                                 break;  
2493                         continue;
2494                 }
2495
2496                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2497                 if (key.objectid != objectid || key.offset != offset ||
2498                     btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2499                         break;
2500
2501                 fi = btrfs_item_ptr(leaf, path.slots[0],
2502                                     struct btrfs_file_extent_item);
2503                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2504                         break;
2505                 if (btrfs_file_extent_compression(leaf, fi) ||
2506                     btrfs_file_extent_encryption(leaf, fi) ||
2507                     btrfs_file_extent_other_encoding(leaf, fi))
2508                         break;
2509
2510                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2511                 /* skip holes and direct mapped extents */
2512                 if (bytenr == 0 || bytenr == offset)
2513                         goto next_extent;
2514
2515                 bytenr += btrfs_file_extent_offset(leaf, fi);
2516                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
2517
2518                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
2519                 cache2 =  btrfs_lookup_block_group(root->fs_info,
2520                                                    offset + num_bytes - 1);
2521                 if (!cache1 || cache1 != cache2 ||
2522                     (!(cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
2523                      !intersect_with_sb(offset, num_bytes)))
2524                         break;
2525
2526                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
2527                                 EXTENT_LOCKED, GFP_NOFS);
2528                 set_state_private(&io_tree, offset, bytenr);
2529 next_extent:
2530                 offset += btrfs_file_extent_num_bytes(leaf, fi);
2531                 path.slots[0]++;
2532         }
2533         btrfs_release_path(&path);
2534
2535         if (offset < total_bytes) {
2536                 fprintf(stderr, "unable to build extent mapping\n");
2537                 goto fail;
2538         }
2539
2540         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
2541         first_free &= ~((u64)sectorsize - 1);
2542         /* backup for extent #0 should exist */
2543         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
2544                 fprintf(stderr, "no backup for the first extent\n");
2545                 goto fail;
2546         }
2547         /* force no allocation from system block group */
2548         root->fs_info->system_allocs = -1;
2549         trans = btrfs_start_transaction(root, 1);
2550         BUG_ON(!trans);
2551         /*
2552          * recow the whole chunk tree, this will remove all chunk tree blocks
2553          * from system block group
2554          */
2555         chunk_root = root->fs_info->chunk_root;
2556         memset(&key, 0, sizeof(key));
2557         while (1) {
2558                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2559                 if (ret < 0)
2560                         break;
2561
2562                 ret = btrfs_next_leaf(chunk_root, &path);
2563                 if (ret)
2564                         break;
2565
2566                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2567                 btrfs_release_path(&path);
2568         }
2569         btrfs_release_path(&path);
2570
2571         offset = 0;
2572         num_bytes = 0;
2573         while(1) {
2574                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
2575                 if (!cache1)
2576                         break;
2577
2578                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
2579                         num_bytes += btrfs_block_group_used(&cache1->item);
2580
2581                 offset = cache1->key.objectid + cache1->key.offset;
2582         }
2583         /* only extent #0 left in system block group? */
2584         if (num_bytes > first_free) {
2585                 fprintf(stderr, "unable to empty system block group\n");
2586                 goto fail;
2587         }
2588         /* create a system chunk that maps the whole device */
2589         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
2590         if (ret) {
2591                 fprintf(stderr, "unable to update system chunk\n");
2592                 goto fail;
2593         }
2594
2595         ret = btrfs_commit_transaction(trans, root);
2596         BUG_ON(ret);
2597
2598         ret = close_ctree(root);
2599         if (ret) {
2600                 fprintf(stderr, "error during close_ctree %d\n", ret);
2601                 goto fail;
2602         }
2603
2604         /* zero btrfs super block mirrors */
2605         memset(buf, 0, sectorsize);
2606         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2607                 bytenr = btrfs_sb_offset(i);
2608                 if (bytenr >= total_bytes)
2609                         break;
2610                 ret = pwrite(fd, buf, sectorsize, bytenr);
2611         }
2612
2613         sb_bytenr = (u64)-1;
2614         /* copy all relocated blocks back */
2615         while(1) {
2616                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
2617                                             EXTENT_LOCKED);
2618                 if (ret)
2619                         break;
2620
2621                 ret = get_state_private(&io_tree, start, &bytenr);
2622                 BUG_ON(ret);
2623
2624                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
2625                                   GFP_NOFS);
2626
2627                 while (start <= end) {
2628                         if (start == BTRFS_SUPER_INFO_OFFSET) {
2629                                 sb_bytenr = bytenr;
2630                                 goto next_sector;
2631                         }
2632                         ret = pread(fd, buf, sectorsize, bytenr);
2633                         if (ret < 0) {
2634                                 fprintf(stderr, "error during pread %d\n", ret);
2635                                 goto fail;
2636                         }
2637                         BUG_ON(ret != sectorsize);
2638                         ret = pwrite(fd, buf, sectorsize, start);
2639                         if (ret < 0) {
2640                                 fprintf(stderr, "error during pwrite %d\n", ret);
2641                                 goto fail;
2642                         }
2643                         BUG_ON(ret != sectorsize);
2644 next_sector:
2645                         start += sectorsize;
2646                         bytenr += sectorsize;
2647                 }
2648         }
2649
2650         ret = fsync(fd);
2651         if (ret) {
2652                 fprintf(stderr, "error during fsync %d\n", ret);
2653                 goto fail;
2654         }
2655         /*
2656          * finally, overwrite btrfs super block.
2657          */
2658         ret = pread(fd, buf, sectorsize, sb_bytenr);
2659         if (ret < 0) {
2660                 fprintf(stderr, "error during pread %d\n", ret);
2661                 goto fail;
2662         }
2663         BUG_ON(ret != sectorsize);
2664         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
2665         if (ret < 0) {
2666                 fprintf(stderr, "error during pwrite %d\n", ret);
2667                 goto fail;
2668         }
2669         BUG_ON(ret != sectorsize);
2670         ret = fsync(fd);
2671         if (ret) {
2672                 fprintf(stderr, "error during fsync %d\n", ret);
2673                 goto fail;
2674         }
2675
2676         close(fd);
2677         free(buf);
2678         extent_io_tree_cleanup(&io_tree);
2679         printf("rollback complete.\n");
2680         return 0;
2681
2682 fail:
2683         if (fd != -1)
2684                 close(fd);
2685         free(buf);
2686         fprintf(stderr, "rollback aborted.\n");
2687         return -1;
2688 }
2689
2690 static void print_usage(void)
2691 {
2692         printf("usage: btrfs-convert [-d] [-i] [-n] [-r] device\n");
2693         printf("\t-d disable data checksum\n");
2694         printf("\t-i ignore xattrs and ACLs\n");
2695         printf("\t-n disable packing of small files\n");
2696         printf("\t-r roll back to ext2fs\n");
2697 }
2698
2699 int main(int argc, char *argv[])
2700 {
2701         int ret;
2702         int packing = 1;
2703         int noxattr = 0;
2704         int datacsum = 1;
2705         int rollback = 0;
2706         char *file;
2707         while(1) {
2708                 int c = getopt(argc, argv, "dinr");
2709                 if (c < 0)
2710                         break;
2711                 switch(c) {
2712                         case 'd':
2713                                 datacsum = 0;
2714                                 break;
2715                         case 'i':
2716                                 noxattr = 1;
2717                                 break;
2718                         case 'n':
2719                                 packing = 0;
2720                                 break;
2721                         case 'r':
2722                                 rollback = 1;
2723                                 break;
2724                         default:
2725                                 print_usage();
2726                                 return 1;
2727                 }
2728         }
2729         argc = argc - optind;
2730         if (argc != 1) {
2731                 print_usage();
2732                 return 1;
2733         }
2734
2735         file = argv[optind];
2736         if (check_mounted(file)) {
2737                 fprintf(stderr, "%s is mounted\n", file);
2738                 return 1;
2739         }
2740
2741         if (rollback) {
2742                 ret = do_rollback(file, 0);
2743         } else {
2744                 ret = do_convert(file, datacsum, packing, noxattr);
2745         }
2746         if (ret)
2747                 return 1;
2748         return 0;
2749 }