btrfs-progs: check link_subvol name base
[platform/upstream/btrfs-progs.git] / btrfs-convert.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 600
20 #define _GNU_SOURCE 1
21
22 #include "kerncompat.h"
23
24 #include <sys/ioctl.h>
25 #include <sys/mount.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/acl.h>
31 #include <fcntl.h>
32 #include <unistd.h>
33 #include <uuid/uuid.h>
34
35 #include "ctree.h"
36 #include "disk-io.h"
37 #include "volumes.h"
38 #include "transaction.h"
39 #include "crc32c.h"
40 #include "utils.h"
41 #include <ext2fs/ext2_fs.h>
42 #include <ext2fs/ext2fs.h>
43 #include <ext2fs/ext2_ext_attr.h>
44
45 #define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO)
46 #define STRIPE_LEN (64 * 1024)
47 #define EXT2_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID
48
49 /*
50  * Open Ext2fs in readonly mode, read block allocation bitmap and
51  * inode bitmap into memory.
52  */
53 static int open_ext2fs(const char *name, ext2_filsys *ret_fs)
54 {
55         errcode_t ret;
56         ext2_filsys ext2_fs;
57         ext2_ino_t ino;
58         ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs);
59         if (ret) {
60                 fprintf(stderr, "ext2fs_open: %s\n", error_message(ret));
61                 goto fail;
62         }
63         ret = ext2fs_read_inode_bitmap(ext2_fs);
64         if (ret) {
65                 fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n",
66                         error_message(ret));
67                 goto fail;
68         }
69         ret = ext2fs_read_block_bitmap(ext2_fs);
70         if (ret) {
71                 fprintf(stderr, "ext2fs_read_block_bitmap: %s\n",
72                         error_message(ret));
73                 goto fail;
74         }
75         /*
76          * search each block group for a free inode. this set up
77          * uninit block/inode bitmaps appropriately.
78          */
79         ino = 1;
80         while (ino <= ext2_fs->super->s_inodes_count) {
81                 ext2_ino_t foo;
82                 ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo);
83                 ino += EXT2_INODES_PER_GROUP(ext2_fs->super);
84         }
85
86         *ret_fs = ext2_fs;
87         return 0;
88 fail:
89         return -1;
90 }
91
92 static int close_ext2fs(ext2_filsys fs)
93 {
94         ext2fs_close(fs);
95         return 0;
96 }
97
98 static int ext2_alloc_block(ext2_filsys fs, u64 goal, u64 *block_ret)
99 {
100         blk_t block;
101
102         if (!ext2fs_new_block(fs, goal, NULL, &block)) {
103                 ext2fs_fast_mark_block_bitmap(fs->block_map, block);
104                 *block_ret = block;
105                 return 0;
106         }
107         return -ENOSPC;
108 }
109
110 static int ext2_free_block(ext2_filsys fs, u64 block)
111 {
112         BUG_ON(block != (blk_t)block);
113         ext2fs_fast_unmark_block_bitmap(fs->block_map, block);
114         return 0;
115 }
116
117 static int cache_free_extents(struct btrfs_root *root, ext2_filsys ext2_fs)
118
119 {
120         int i, ret = 0;
121         blk_t block;
122         u64 bytenr;
123         u64 blocksize = ext2_fs->blocksize;
124
125         block = ext2_fs->super->s_first_data_block;
126         for (; block < ext2_fs->super->s_blocks_count; block++) {
127                 if (ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block))
128                         continue;
129                 bytenr = block * blocksize;
130                 ret = set_extent_dirty(&root->fs_info->free_space_cache,
131                                        bytenr, bytenr + blocksize - 1, 0);
132                 BUG_ON(ret);
133         }
134
135         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
136                 bytenr = btrfs_sb_offset(i);
137                 bytenr &= ~((u64)STRIPE_LEN - 1);
138                 if (bytenr >= blocksize * ext2_fs->super->s_blocks_count)
139                         break;
140                 clear_extent_dirty(&root->fs_info->free_space_cache, bytenr,
141                                    bytenr + STRIPE_LEN - 1, 0);
142         }
143
144         clear_extent_dirty(&root->fs_info->free_space_cache,
145                            0, BTRFS_SUPER_INFO_OFFSET - 1, 0);
146
147         return 0;
148 }
149
150 static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes,
151                                u64 hint_byte, struct btrfs_key *ins)
152 {
153         u64 start;
154         u64 end;
155         u64 last = hint_byte;
156         int ret;
157         int wrapped = 0;
158         struct btrfs_block_group_cache *cache;
159
160         while(1) {
161                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
162                                             last, &start, &end, EXTENT_DIRTY);
163                 if (ret) {
164                         if (wrapped++ == 0) {
165                                 last = 0;
166                                 continue;
167                         } else {
168                                 goto fail;
169                         }
170                 }
171
172                 start = max(last, start);
173                 last = end + 1;
174                 if (last - start < num_bytes)
175                         continue;
176
177                 last = start + num_bytes;
178                 if (test_range_bit(&root->fs_info->pinned_extents,
179                                    start, last - 1, EXTENT_DIRTY, 0))
180                         continue;
181
182                 cache = btrfs_lookup_block_group(root->fs_info, start);
183                 BUG_ON(!cache);
184                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM ||
185                     last > cache->key.objectid + cache->key.offset) {
186                         last = cache->key.objectid + cache->key.offset;
187                         continue;
188                 }
189
190                 clear_extent_dirty(&root->fs_info->free_space_cache,
191                                    start, start + num_bytes - 1, 0);
192
193                 ins->objectid = start;
194                 ins->offset = num_bytes;
195                 ins->type = BTRFS_EXTENT_ITEM_KEY;
196                 return 0;
197         }
198 fail:
199         fprintf(stderr, "not enough free space\n");
200         return -ENOSPC;
201 }
202
203 static int intersect_with_sb(u64 bytenr, u64 num_bytes)
204 {
205         int i;
206         u64 offset;
207
208         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
209                 offset = btrfs_sb_offset(i);
210                 offset &= ~((u64)STRIPE_LEN - 1);
211
212                 if (bytenr < offset + STRIPE_LEN &&
213                     bytenr + num_bytes > offset)
214                         return 1;
215         }
216         return 0;
217 }
218
219 static int custom_free_extent(struct btrfs_root *root, u64 bytenr,
220                               u64 num_bytes)
221 {
222         return intersect_with_sb(bytenr, num_bytes);
223 }
224
225 static struct btrfs_extent_ops extent_ops = {
226         .alloc_extent = custom_alloc_extent,
227         .free_extent = custom_free_extent,
228 };
229
230 struct dir_iterate_data {
231         struct btrfs_trans_handle *trans;
232         struct btrfs_root *root;
233         struct btrfs_inode_item *inode;
234         u64 objectid;
235         u64 index_cnt;
236         u64 parent;
237         int errcode;
238 };
239
240 static u8 filetype_conversion_table[EXT2_FT_MAX] = {
241         [EXT2_FT_UNKNOWN]       = BTRFS_FT_UNKNOWN,
242         [EXT2_FT_REG_FILE]      = BTRFS_FT_REG_FILE,
243         [EXT2_FT_DIR]           = BTRFS_FT_DIR,
244         [EXT2_FT_CHRDEV]        = BTRFS_FT_CHRDEV,
245         [EXT2_FT_BLKDEV]        = BTRFS_FT_BLKDEV,
246         [EXT2_FT_FIFO]          = BTRFS_FT_FIFO,
247         [EXT2_FT_SOCK]          = BTRFS_FT_SOCK,
248         [EXT2_FT_SYMLINK]       = BTRFS_FT_SYMLINK,
249 };
250
251 static int dir_iterate_proc(ext2_ino_t dir, int entry,
252                             struct ext2_dir_entry *dirent,
253                             int offset, int blocksize,
254                             char *buf,void *priv_data)
255 {
256         int ret;
257         int file_type;
258         u64 objectid;
259         u64 inode_size;
260         char dotdot[] = "..";
261         struct btrfs_key location;
262         struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data;
263         int name_len;
264
265         name_len = dirent->name_len & 0xFF;
266
267         objectid = dirent->inode + INO_OFFSET;
268         if (!strncmp(dirent->name, dotdot, name_len)) {
269                 if (name_len == 2) {
270                         BUG_ON(idata->parent != 0);
271                         idata->parent = objectid;
272                 }
273                 return 0;
274         }
275         if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO)
276                 return 0;
277
278         location.objectid = objectid;
279         location.offset = 0;
280         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
281
282         file_type = dirent->name_len >> 8;
283         BUG_ON(file_type > EXT2_FT_SYMLINK);
284         ret = btrfs_insert_dir_item(idata->trans, idata->root,
285                                     dirent->name, name_len,
286                                     idata->objectid, &location,
287                                     filetype_conversion_table[file_type],
288                                     idata->index_cnt);
289         if (ret)
290                 goto fail;
291         ret = btrfs_insert_inode_ref(idata->trans, idata->root,
292                                      dirent->name, name_len,
293                                      objectid, idata->objectid,
294                                      idata->index_cnt);
295         if (ret)
296                 goto fail;
297         idata->index_cnt++;
298         inode_size = btrfs_stack_inode_size(idata->inode) +
299                      name_len * 2;
300         btrfs_set_stack_inode_size(idata->inode, inode_size);
301         return 0;
302 fail:
303         idata->errcode = ret;
304         return BLOCK_ABORT;
305 }
306
307 static int create_dir_entries(struct btrfs_trans_handle *trans,
308                               struct btrfs_root *root, u64 objectid,
309                               struct btrfs_inode_item *btrfs_inode,
310                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
311 {
312         int ret;
313         errcode_t err;
314         struct dir_iterate_data data = {
315                 .trans          = trans,
316                 .root           = root,
317                 .inode          = btrfs_inode,
318                 .objectid       = objectid,
319                 .index_cnt      = 2,
320                 .parent         = 0,
321                 .errcode        = 0,
322         };
323
324         err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL,
325                                   dir_iterate_proc, &data);
326         if (err)
327                 goto error;
328         ret = data.errcode;
329         if (ret == 0 && data.parent == objectid) {
330                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
331                                              objectid, objectid, 0);
332         }
333         return ret;
334 error:
335         fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err));
336         return -1;
337 }
338
339 static int read_disk_extent(struct btrfs_root *root, u64 bytenr,
340                             u32 num_bytes, char *buffer)
341 {
342         int ret;
343         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
344
345         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr);
346         if (ret != num_bytes)
347                 goto fail;
348         ret = 0;
349 fail:
350         if (ret > 0)
351                 ret = -1;
352         return ret;
353 }
354 /*
355  * Record a file extent. Do all the required works, such as inserting
356  * file extent item, inserting extent item and backref item into extent
357  * tree and updating block accounting.
358  */
359 static int record_file_extent(struct btrfs_trans_handle *trans,
360                               struct btrfs_root *root, u64 objectid,
361                               struct btrfs_inode_item *inode,
362                               u64 file_pos, u64 disk_bytenr,
363                               u64 num_bytes, int checksum)
364 {
365         int ret;
366         struct btrfs_fs_info *info = root->fs_info;
367         struct btrfs_root *extent_root = info->extent_root;
368         struct extent_buffer *leaf;
369         struct btrfs_file_extent_item *fi;
370         struct btrfs_key ins_key;
371         struct btrfs_path path;
372         struct btrfs_extent_item *ei;
373         u32 blocksize = root->sectorsize;
374         u64 nbytes;
375
376         if (disk_bytenr == 0) {
377                 ret = btrfs_insert_file_extent(trans, root, objectid,
378                                                 file_pos, disk_bytenr,
379                                                 num_bytes, num_bytes);
380                 return ret;
381         }
382
383         btrfs_init_path(&path);
384
385         if (checksum) {
386                 u64 offset;
387                 char *buffer;
388
389                 ret = -ENOMEM;
390                 buffer = malloc(blocksize);
391                 if (!buffer)
392                         goto fail;
393                 for (offset = 0; offset < num_bytes; offset += blocksize) {
394                         ret = read_disk_extent(root, disk_bytenr + offset,
395                                                 blocksize, buffer);
396                         if (ret)
397                                 break;
398                         ret = btrfs_csum_file_block(trans,
399                                                     root->fs_info->csum_root,
400                                                     disk_bytenr + num_bytes,
401                                                     disk_bytenr + offset,
402                                                     buffer, blocksize);
403                         if (ret)
404                                 break;
405                 }
406                 free(buffer);
407                 if (ret)
408                         goto fail;
409         }
410
411         ins_key.objectid = objectid;
412         ins_key.offset = file_pos;
413         btrfs_set_key_type(&ins_key, BTRFS_EXTENT_DATA_KEY);
414         ret = btrfs_insert_empty_item(trans, root, &path, &ins_key,
415                                       sizeof(*fi));
416         if (ret)
417                 goto fail;
418         leaf = path.nodes[0];
419         fi = btrfs_item_ptr(leaf, path.slots[0],
420                             struct btrfs_file_extent_item);
421         btrfs_set_file_extent_generation(leaf, fi, trans->transid);
422         btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
423         btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
424         btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
425         btrfs_set_file_extent_offset(leaf, fi, 0);
426         btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
427         btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
428         btrfs_set_file_extent_compression(leaf, fi, 0);
429         btrfs_set_file_extent_encryption(leaf, fi, 0);
430         btrfs_set_file_extent_other_encoding(leaf, fi, 0);
431         btrfs_mark_buffer_dirty(leaf);
432
433         nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes;
434         btrfs_set_stack_inode_nbytes(inode, nbytes);
435
436         btrfs_release_path(&path);
437
438         ins_key.objectid = disk_bytenr;
439         ins_key.offset = num_bytes;
440         ins_key.type = BTRFS_EXTENT_ITEM_KEY;
441
442         ret = btrfs_insert_empty_item(trans, extent_root, &path,
443                                       &ins_key, sizeof(*ei));
444         if (ret == 0) {
445                 leaf = path.nodes[0];
446                 ei = btrfs_item_ptr(leaf, path.slots[0],
447                                     struct btrfs_extent_item);
448
449                 btrfs_set_extent_refs(leaf, ei, 0);
450                 btrfs_set_extent_generation(leaf, ei, 0);
451                 btrfs_set_extent_flags(leaf, ei, BTRFS_EXTENT_FLAG_DATA);
452
453                 btrfs_mark_buffer_dirty(leaf);
454
455                 ret = btrfs_update_block_group(trans, root, disk_bytenr,
456                                                num_bytes, 1, 0);
457                 if (ret)
458                         goto fail;
459         } else if (ret != -EEXIST) {
460                 goto fail;
461         }
462         btrfs_extent_post_op(trans, extent_root);
463
464         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, 0,
465                                    root->root_key.objectid,
466                                    objectid, file_pos);
467         if (ret)
468                 goto fail;
469         ret = 0;
470 fail:
471         btrfs_release_path(&path);
472         return ret;
473 }
474
475 static int record_file_blocks(struct btrfs_trans_handle *trans,
476                               struct btrfs_root *root, u64 objectid,
477                               struct btrfs_inode_item *inode,
478                               u64 file_block, u64 disk_block,
479                               u64 num_blocks, int checksum)
480 {
481         u64 file_pos = file_block * root->sectorsize;
482         u64 disk_bytenr = disk_block * root->sectorsize;
483         u64 num_bytes = num_blocks * root->sectorsize;
484         return record_file_extent(trans, root, objectid, inode, file_pos,
485                                   disk_bytenr, num_bytes, checksum);
486 }
487
488 struct blk_iterate_data {
489         struct btrfs_trans_handle *trans;
490         struct btrfs_root *root;
491         struct btrfs_inode_item *inode;
492         u64 objectid;
493         u64 first_block;
494         u64 disk_block;
495         u64 num_blocks;
496         u64 boundary;
497         int checksum;
498         int errcode;
499 };
500
501 static int block_iterate_proc(ext2_filsys ext2_fs,
502                               u64 disk_block, u64 file_block,
503                               struct blk_iterate_data *idata)
504 {
505         int ret;
506         int sb_region;
507         int do_barrier;
508         struct btrfs_root *root = idata->root;
509         struct btrfs_trans_handle *trans = idata->trans;
510         struct btrfs_block_group_cache *cache;
511         u64 bytenr = disk_block * root->sectorsize;
512
513         sb_region = intersect_with_sb(bytenr, root->sectorsize);
514         do_barrier = sb_region || disk_block >= idata->boundary;
515         if ((idata->num_blocks > 0 && do_barrier) ||
516             (file_block > idata->first_block + idata->num_blocks) ||
517             (disk_block != idata->disk_block + idata->num_blocks)) {
518                 if (idata->num_blocks > 0) {
519                         ret = record_file_blocks(trans, root, idata->objectid,
520                                         idata->inode, idata->first_block,
521                                         idata->disk_block, idata->num_blocks,
522                                         idata->checksum);
523                         if (ret)
524                                 goto fail;
525                         idata->first_block += idata->num_blocks;
526                         idata->num_blocks = 0;
527                 }
528                 if (file_block > idata->first_block) {
529                         ret = record_file_blocks(trans, root, idata->objectid,
530                                         idata->inode, idata->first_block,
531                                         0, file_block - idata->first_block,
532                                         idata->checksum);
533                         if (ret)
534                                 goto fail;
535                 }
536
537                 if (sb_region) {
538                         bytenr += STRIPE_LEN - 1;
539                         bytenr &= ~((u64)STRIPE_LEN - 1);
540                 } else {
541                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
542                         BUG_ON(!cache);
543                         bytenr = cache->key.objectid + cache->key.offset;
544                 }
545
546                 idata->first_block = file_block;
547                 idata->disk_block = disk_block;
548                 idata->boundary = bytenr / root->sectorsize;
549         }
550         idata->num_blocks++;
551         return 0;
552 fail:
553         idata->errcode = ret;
554         return BLOCK_ABORT;
555 }
556
557 static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr,
558                                 e2_blkcnt_t blockcnt, blk_t ref_block,
559                                 int ref_offset, void *priv_data)
560 {
561         struct blk_iterate_data *idata;
562         idata = (struct blk_iterate_data *)priv_data;
563         return block_iterate_proc(fs, *blocknr, blockcnt, idata);
564 }
565
566 /*
567  * traverse file's data blocks, record these data blocks as file extents.
568  */
569 static int create_file_extents(struct btrfs_trans_handle *trans,
570                                struct btrfs_root *root, u64 objectid,
571                                struct btrfs_inode_item *btrfs_inode,
572                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
573                                int datacsum, int packing)
574 {
575         int ret;
576         char *buffer = NULL;
577         errcode_t err;
578         u32 last_block;
579         u32 sectorsize = root->sectorsize;
580         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
581         struct blk_iterate_data data = {
582                 .trans          = trans,
583                 .root           = root,
584                 .inode          = btrfs_inode,
585                 .objectid       = objectid,
586                 .first_block    = 0,
587                 .disk_block     = 0,
588                 .num_blocks     = 0,
589                 .boundary       = (u64)-1,
590                 .checksum       = datacsum,
591                 .errcode        = 0,
592         };
593         err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY,
594                                     NULL, __block_iterate_proc, &data);
595         if (err)
596                 goto error;
597         ret = data.errcode;
598         if (ret)
599                 goto fail;
600         if (packing && data.first_block == 0 && data.num_blocks > 0 &&
601             inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
602                 u64 num_bytes = data.num_blocks * sectorsize;
603                 u64 disk_bytenr = data.disk_block * sectorsize;
604                 u64 nbytes;
605
606                 buffer = malloc(num_bytes);
607                 if (!buffer)
608                         return -ENOMEM;
609                 ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer);
610                 if (ret)
611                         goto fail;
612                 if (num_bytes > inode_size)
613                         num_bytes = inode_size;
614                 ret = btrfs_insert_inline_extent(trans, root, objectid,
615                                                  0, buffer, num_bytes);
616                 if (ret)
617                         goto fail;
618                 nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes;
619                 btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes);
620         } else if (data.num_blocks > 0) {
621                 ret = record_file_blocks(trans, root, objectid, btrfs_inode,
622                                          data.first_block, data.disk_block,
623                                          data.num_blocks, data.checksum);
624                 if (ret)
625                         goto fail;
626         }
627         data.first_block += data.num_blocks;
628         last_block = (inode_size + sectorsize - 1) / sectorsize;
629         if (last_block > data.first_block) {
630                 ret = record_file_blocks(trans, root, objectid, btrfs_inode,
631                                          data.first_block, 0, last_block -
632                                          data.first_block, data.checksum);
633         }
634 fail:
635         if (buffer)
636                 free(buffer);
637         return ret;
638 error:
639         fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err));
640         return -1;
641 }
642
643 static int create_symbol_link(struct btrfs_trans_handle *trans,
644                               struct btrfs_root *root, u64 objectid,
645                               struct btrfs_inode_item *btrfs_inode,
646                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
647                               struct ext2_inode *ext2_inode)
648 {
649         int ret;
650         char *pathname;
651         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
652         if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) {
653                 btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1);
654                 ret = create_file_extents(trans, root, objectid, btrfs_inode,
655                                           ext2_fs, ext2_ino, 1, 1);
656                 btrfs_set_stack_inode_size(btrfs_inode, inode_size);
657                 return ret;
658         }
659
660         pathname = (char *)&(ext2_inode->i_block[0]);
661         BUG_ON(pathname[inode_size] != 0);
662         ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
663                                          pathname, inode_size + 1);
664         btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1);
665         return ret;
666 }
667
668 /*
669  * Following xattr/acl related codes are based on codes in
670  * fs/ext3/xattr.c and fs/ext3/acl.c
671  */
672 #define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr))
673 #define EXT2_XATTR_BFIRST(ptr) \
674         ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1))
675 #define EXT2_XATTR_IHDR(inode) \
676         ((struct ext2_ext_attr_header *) ((void *)(inode) + \
677                 EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize))
678 #define EXT2_XATTR_IFIRST(inode) \
679         ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \
680                 sizeof(EXT2_XATTR_IHDR(inode)->h_magic)))
681
682 static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry,
683                                   const void *end)
684 {
685         struct ext2_ext_attr_entry *next;
686
687         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
688                 next = EXT2_EXT_ATTR_NEXT(entry);
689                 if ((void *)next >= end)
690                         return -EIO;
691                 entry = next;
692         }
693         return 0;
694 }
695
696 static int ext2_xattr_check_block(const char *buf, size_t size)
697 {
698         int error;
699         struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf);
700
701         if (header->h_magic != EXT2_EXT_ATTR_MAGIC ||
702             header->h_blocks != 1)
703                 return -EIO;
704         error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size);
705         return error;
706 }
707
708 static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry,
709                                   size_t size)
710 {
711         size_t value_size = entry->e_value_size;
712
713         if (entry->e_value_block != 0 || value_size > size ||
714             entry->e_value_offs + value_size > size)
715                 return -EIO;
716         return 0;
717 }
718
719 #define EXT2_ACL_VERSION        0x0001
720
721 typedef struct {
722         __le16          e_tag;
723         __le16          e_perm;
724         __le32          e_id;
725 } ext2_acl_entry;
726
727 typedef struct {
728         __le16          e_tag;
729         __le16          e_perm;
730 } ext2_acl_entry_short;
731
732 typedef struct {
733         __le32          a_version;
734 } ext2_acl_header;
735
736 static inline int ext2_acl_count(size_t size)
737 {
738         ssize_t s;
739         size -= sizeof(ext2_acl_header);
740         s = size - 4 * sizeof(ext2_acl_entry_short);
741         if (s < 0) {
742                 if (size % sizeof(ext2_acl_entry_short))
743                         return -1;
744                 return size / sizeof(ext2_acl_entry_short);
745         } else {
746                 if (s % sizeof(ext2_acl_entry))
747                         return -1;
748                 return s / sizeof(ext2_acl_entry) + 4;
749         }
750 }
751
752 #define ACL_EA_VERSION          0x0002
753
754 typedef struct {
755         __le16          e_tag;
756         __le16          e_perm;
757         __le32          e_id;
758 } acl_ea_entry;
759
760 typedef struct {
761         __le32          a_version;
762         acl_ea_entry    a_entries[0];
763 } acl_ea_header;
764
765 static inline size_t acl_ea_size(int count)
766 {
767         return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry);
768 }
769
770 static int ext2_acl_to_xattr(void *dst, const void *src,
771                              size_t dst_size, size_t src_size)
772 {
773         int i, count;
774         const void *end = src + src_size;
775         acl_ea_header *ext_acl = (acl_ea_header *)dst;
776         acl_ea_entry *dst_entry = ext_acl->a_entries;
777         ext2_acl_entry *src_entry;
778
779         if (src_size < sizeof(ext2_acl_header))
780                 goto fail;
781         if (((ext2_acl_header *)src)->a_version !=
782             cpu_to_le32(EXT2_ACL_VERSION))
783                 goto fail;
784         src += sizeof(ext2_acl_header);
785         count = ext2_acl_count(src_size);
786         if (count <= 0)
787                 goto fail;
788
789         BUG_ON(dst_size < acl_ea_size(count));
790         ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION);
791         for (i = 0; i < count; i++, dst_entry++) {
792                 src_entry = (ext2_acl_entry *)src;
793                 if (src + sizeof(ext2_acl_entry_short) > end)
794                         goto fail;
795                 dst_entry->e_tag = src_entry->e_tag;
796                 dst_entry->e_perm = src_entry->e_perm;
797                 switch (le16_to_cpu(src_entry->e_tag)) {
798                 case ACL_USER_OBJ:
799                 case ACL_GROUP_OBJ:
800                 case ACL_MASK:
801                 case ACL_OTHER:
802                         src += sizeof(ext2_acl_entry_short);
803                         dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
804                         break;
805                 case ACL_USER:
806                 case ACL_GROUP:
807                         src += sizeof(ext2_acl_entry);
808                         if (src > end)
809                                 goto fail;
810                         dst_entry->e_id = src_entry->e_id;
811                         break;
812                 default:
813                         goto fail;
814                 }
815         }
816         if (src != end)
817                 goto fail;
818         return 0;
819 fail:
820         return -EINVAL;
821 }
822
823 static char *xattr_prefix_table[] = {
824         [1] =   "user.",
825         [2] =   "system.posix_acl_access",
826         [3] =   "system.posix_acl_default",
827         [4] =   "trusted.",
828         [6] =   "security.",
829 };
830
831 static int copy_single_xattr(struct btrfs_trans_handle *trans,
832                              struct btrfs_root *root, u64 objectid,
833                              struct ext2_ext_attr_entry *entry,
834                              const void *data, u32 datalen)
835 {
836         int ret = 0;
837         int name_len;
838         int name_index;
839         void *databuf = NULL;
840         char namebuf[XATTR_NAME_MAX + 1];
841
842         name_index = entry->e_name_index;
843         if (name_index >= ARRAY_SIZE(xattr_prefix_table) ||
844             xattr_prefix_table[name_index] == NULL)
845                 return -EOPNOTSUPP;
846         name_len = strlen(xattr_prefix_table[name_index]) +
847                    entry->e_name_len;
848         if (name_len >= sizeof(namebuf))
849                 return -ERANGE;
850
851         if (name_index == 2 || name_index == 3) {
852                 size_t bufsize = acl_ea_size(ext2_acl_count(datalen));
853                 databuf = malloc(bufsize);
854                 if (!databuf)
855                        return -ENOMEM;
856                 ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen);
857                 if (ret)
858                         goto out;
859                 data = databuf;
860                 datalen = bufsize;
861         }
862         strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX);
863         strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len);
864         if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) -
865             sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) {
866                 fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n",
867                         objectid - INO_OFFSET, name_len, namebuf);
868                 goto out;
869         }
870         ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len,
871                                       data, datalen, objectid);
872 out:
873         if (databuf)
874                 free(databuf);
875         return ret;
876 }
877
878 static int copy_extended_attrs(struct btrfs_trans_handle *trans,
879                                struct btrfs_root *root, u64 objectid,
880                                struct btrfs_inode_item *btrfs_inode,
881                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
882 {
883         int ret = 0;
884         int inline_ea = 0;
885         errcode_t err;
886         u32 datalen;
887         u32 block_size = ext2_fs->blocksize;
888         u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super);
889         struct ext2_inode_large *ext2_inode;
890         struct ext2_ext_attr_entry *entry;
891         void *data;
892         char *buffer = NULL;
893         char inode_buf[EXT2_GOOD_OLD_INODE_SIZE];
894
895         if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) {
896                 ext2_inode = (struct ext2_inode_large *)inode_buf;
897         } else {
898                 ext2_inode = (struct ext2_inode_large *)malloc(inode_size);
899                 if (!ext2_inode)
900                        return -ENOMEM;
901         }
902         err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode,
903                                      inode_size);
904         if (err) {
905                 fprintf(stderr, "ext2fs_read_inode_full: %s\n",
906                         error_message(err));
907                 ret = -1;
908                 goto out;
909         }
910
911         if (ext2_ino > ext2_fs->super->s_first_ino &&
912             inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
913                 if (EXT2_GOOD_OLD_INODE_SIZE +
914                     ext2_inode->i_extra_isize > inode_size) {
915                         ret = -EIO;
916                         goto out;
917                 }
918                 if (ext2_inode->i_extra_isize != 0 &&
919                     EXT2_XATTR_IHDR(ext2_inode)->h_magic ==
920                     EXT2_EXT_ATTR_MAGIC) {
921                         inline_ea = 1;
922                 }
923         }
924         if (inline_ea) {
925                 int total;
926                 void *end = (void *)ext2_inode + inode_size;
927                 entry = EXT2_XATTR_IFIRST(ext2_inode);
928                 total = end - (void *)entry;
929                 ret = ext2_xattr_check_names(entry, end);
930                 if (ret)
931                         goto out;
932                 while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
933                         ret = ext2_xattr_check_entry(entry, total);
934                         if (ret)
935                                 goto out;
936                         data = (void *)EXT2_XATTR_IFIRST(ext2_inode) +
937                                 entry->e_value_offs;
938                         datalen = entry->e_value_size;
939                         ret = copy_single_xattr(trans, root, objectid,
940                                                 entry, data, datalen);
941                         if (ret)
942                                 goto out;
943                         entry = EXT2_EXT_ATTR_NEXT(entry);
944                 }
945         }
946
947         if (ext2_inode->i_file_acl == 0)
948                 goto out;
949
950         buffer = malloc(block_size);
951         if (!buffer) {
952                 ret = -ENOMEM;
953                 goto out;
954         }
955         err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer);
956         if (err) {
957                 fprintf(stderr, "ext2fs_read_ext_attr: %s\n",
958                         error_message(err));
959                 ret = -1;
960                 goto out;
961         }
962         ret = ext2_xattr_check_block(buffer, block_size);
963         if (ret)
964                 goto out;
965
966         entry = EXT2_XATTR_BFIRST(buffer);
967         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
968                 ret = ext2_xattr_check_entry(entry, block_size);
969                 if (ret)
970                         goto out;
971                 data = buffer + entry->e_value_offs;
972                 datalen = entry->e_value_size;
973                 ret = copy_single_xattr(trans, root, objectid,
974                                         entry, data, datalen);
975                 if (ret)
976                         goto out;
977                 entry = EXT2_EXT_ATTR_NEXT(entry);
978         }
979 out:
980         if (buffer != NULL)
981                 free(buffer);
982         if ((void *)ext2_inode != inode_buf)
983                 free(ext2_inode);
984         return ret;
985 }
986 #define MINORBITS       20
987 #define MKDEV(ma, mi)   (((ma) << MINORBITS) | (mi))
988
989 static inline dev_t old_decode_dev(u16 val)
990 {
991         return MKDEV((val >> 8) & 255, val & 255);
992 }
993
994 static inline dev_t new_decode_dev(u32 dev)
995 {
996         unsigned major = (dev & 0xfff00) >> 8;
997         unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
998         return MKDEV(major, minor);
999 }
1000
1001 static int copy_inode_item(struct btrfs_inode_item *dst,
1002                            struct ext2_inode *src, u32 blocksize)
1003 {
1004         btrfs_set_stack_inode_generation(dst, 1);
1005         btrfs_set_stack_inode_size(dst, src->i_size);
1006         btrfs_set_stack_inode_nbytes(dst, 0);
1007         btrfs_set_stack_inode_block_group(dst, 0);
1008         btrfs_set_stack_inode_nlink(dst, src->i_links_count);
1009         btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16));
1010         btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16));
1011         btrfs_set_stack_inode_mode(dst, src->i_mode);
1012         btrfs_set_stack_inode_rdev(dst, 0);
1013         btrfs_set_stack_inode_flags(dst, 0);
1014         btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime);
1015         btrfs_set_stack_timespec_nsec(&dst->atime, 0);
1016         btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime);
1017         btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
1018         btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime);
1019         btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
1020         btrfs_set_stack_timespec_sec(&dst->otime, 0);
1021         btrfs_set_stack_timespec_nsec(&dst->otime, 0);
1022
1023         if (S_ISDIR(src->i_mode)) {
1024                 btrfs_set_stack_inode_size(dst, 0);
1025                 btrfs_set_stack_inode_nlink(dst, 1);
1026         }
1027         if (S_ISREG(src->i_mode)) {
1028                 btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 |
1029                                            (u64)src->i_size);
1030         }
1031         if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) &&
1032             !S_ISLNK(src->i_mode)) {
1033                 if (src->i_block[0]) {
1034                         btrfs_set_stack_inode_rdev(dst,
1035                                 old_decode_dev(src->i_block[0]));
1036                 } else {
1037                         btrfs_set_stack_inode_rdev(dst,
1038                                 new_decode_dev(src->i_block[1]));
1039                 }
1040         }
1041         return 0;
1042 }
1043
1044 /*
1045  * copy a single inode. do all the required works, such as cloning
1046  * inode item, creating file extents and creating directory entries.
1047  */
1048 static int copy_single_inode(struct btrfs_trans_handle *trans,
1049                              struct btrfs_root *root, u64 objectid,
1050                              ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
1051                              struct ext2_inode *ext2_inode,
1052                              int datacsum, int packing, int noxattr)
1053 {
1054         int ret;
1055         struct btrfs_key inode_key;
1056         struct btrfs_inode_item btrfs_inode;
1057
1058         if (ext2_inode->i_links_count == 0)
1059                 return 0;
1060
1061         copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize);
1062         if (!datacsum && S_ISREG(ext2_inode->i_mode)) {
1063                 u32 flags = btrfs_stack_inode_flags(&btrfs_inode) |
1064                             BTRFS_INODE_NODATASUM;
1065                 btrfs_set_stack_inode_flags(&btrfs_inode, flags);
1066         }
1067
1068         switch (ext2_inode->i_mode & S_IFMT) {
1069         case S_IFREG:
1070                 ret = create_file_extents(trans, root, objectid, &btrfs_inode,
1071                                         ext2_fs, ext2_ino, datacsum, packing);
1072                 break;
1073         case S_IFDIR:
1074                 ret = create_dir_entries(trans, root, objectid, &btrfs_inode,
1075                                          ext2_fs, ext2_ino);
1076                 break;
1077         case S_IFLNK:
1078                 ret = create_symbol_link(trans, root, objectid, &btrfs_inode,
1079                                          ext2_fs, ext2_ino, ext2_inode);
1080                 break;
1081         default:
1082                 ret = 0;
1083                 break;
1084         }
1085         if (ret)
1086                 return ret;
1087
1088         if (!noxattr) {
1089                 ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode,
1090                                           ext2_fs, ext2_ino);
1091                 if (ret)
1092                         return ret;
1093         }
1094         inode_key.objectid = objectid;
1095         inode_key.offset = 0;
1096         btrfs_set_key_type(&inode_key, BTRFS_INODE_ITEM_KEY);
1097         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1098         return ret;
1099 }
1100
1101 static int copy_disk_extent(struct btrfs_root *root, u64 dst_bytenr,
1102                             u64 src_bytenr, u32 num_bytes)
1103 {
1104         int ret;
1105         char *buffer;
1106         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
1107
1108         buffer = malloc(num_bytes);
1109         if (!buffer)
1110                 return -ENOMEM;
1111         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, src_bytenr);
1112         if (ret != num_bytes)
1113                 goto fail;
1114         ret = pwrite(fs_devs->latest_bdev, buffer, num_bytes, dst_bytenr);
1115         if (ret != num_bytes)
1116                 goto fail;
1117         ret = 0;
1118 fail:
1119         free(buffer);
1120         if (ret > 0)
1121                 ret = -1;
1122         return ret;
1123 }
1124 /*
1125  * scan ext2's inode bitmap and copy all used inodes.
1126  */
1127 static int copy_inodes(struct btrfs_root *root, ext2_filsys ext2_fs,
1128                        int datacsum, int packing, int noxattr)
1129 {
1130         int ret;
1131         errcode_t err;
1132         ext2_inode_scan ext2_scan;
1133         struct ext2_inode ext2_inode;
1134         ext2_ino_t ext2_ino;
1135         u64 objectid;
1136         struct btrfs_trans_handle *trans;
1137
1138         trans = btrfs_start_transaction(root, 1);
1139         if (!trans)
1140                 return -ENOMEM;
1141         err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan);
1142         if (err) {
1143                 fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err));
1144                 return -1;
1145         }
1146         while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino,
1147                                              &ext2_inode))) {
1148                 /* no more inodes */
1149                 if (ext2_ino == 0)
1150                         break;
1151                 /* skip special inode in ext2fs */
1152                 if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO &&
1153                     ext2_ino != EXT2_ROOT_INO)
1154                         continue;
1155                 objectid = ext2_ino + INO_OFFSET;
1156                 ret = copy_single_inode(trans, root,
1157                                         objectid, ext2_fs, ext2_ino,
1158                                         &ext2_inode, datacsum, packing,
1159                                         noxattr);
1160                 if (ret)
1161                         return ret;
1162                 if (trans->blocks_used >= 4096) {
1163                         ret = btrfs_commit_transaction(trans, root);
1164                         BUG_ON(ret);
1165                         trans = btrfs_start_transaction(root, 1);
1166                         BUG_ON(!trans);
1167                 }
1168         }
1169         if (err) {
1170                 fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err));
1171                 return -1;
1172         }
1173         ret = btrfs_commit_transaction(trans, root);
1174         BUG_ON(ret);
1175
1176         return ret;
1177 }
1178
1179 /*
1180  * Construct a range of ext2fs image file.
1181  * scan block allocation bitmap, find all blocks used by the ext2fs
1182  * in this range and create file extents that point to these blocks.
1183  *
1184  * Note: Before calling the function, no file extent points to blocks
1185  *       in this range
1186  */
1187 static int create_image_file_range(struct btrfs_trans_handle *trans,
1188                                    struct btrfs_root *root, u64 objectid,
1189                                    struct btrfs_inode_item *inode,
1190                                    u64 start_byte, u64 end_byte,
1191                                    ext2_filsys ext2_fs)
1192 {
1193         u32 blocksize = ext2_fs->blocksize;
1194         u32 block = start_byte / blocksize;
1195         u32 last_block = (end_byte + blocksize - 1) / blocksize;
1196         int ret = 0;
1197         struct blk_iterate_data data = {
1198                 .trans          = trans,
1199                 .root           = root,
1200                 .inode          = inode,
1201                 .objectid       = objectid,
1202                 .first_block    = block,
1203                 .disk_block     = 0,
1204                 .num_blocks     = 0,
1205                 .boundary       = (u64)-1,
1206                 .checksum       = 0,
1207                 .errcode        = 0,
1208         };
1209         for (; start_byte < end_byte; block++, start_byte += blocksize) {
1210                 if (!ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block))
1211                         continue;
1212                 ret = block_iterate_proc(NULL, block, block, &data);
1213                 if (ret & BLOCK_ABORT) {
1214                         ret = data.errcode;
1215                         goto fail;
1216                 }
1217         }
1218         if (data.num_blocks > 0) {
1219                 ret = record_file_blocks(trans, root, objectid, inode,
1220                                          data.first_block, data.disk_block,
1221                                          data.num_blocks, 0);
1222                 if (ret)
1223                         goto fail;
1224                 data.first_block += data.num_blocks;
1225         }
1226         if (last_block > data.first_block) {
1227                 ret = record_file_blocks(trans, root, objectid, inode,
1228                                          data.first_block, 0, last_block -
1229                                          data.first_block, 0);
1230                 if (ret)
1231                         goto fail;
1232         }
1233 fail:
1234         return ret;
1235 }
1236 /*
1237  * Create the ext2fs image file.
1238  */
1239 static int create_ext2_image(struct btrfs_root *root, ext2_filsys ext2_fs,
1240                              const char *name)
1241 {
1242         int ret;
1243         struct btrfs_key key;
1244         struct btrfs_key location;
1245         struct btrfs_path path;
1246         struct btrfs_inode_item btrfs_inode;
1247         struct btrfs_inode_item *inode_item;
1248         struct extent_buffer *leaf;
1249         struct btrfs_fs_info *fs_info = root->fs_info;
1250         struct btrfs_root *extent_root = fs_info->extent_root;
1251         struct btrfs_trans_handle *trans;
1252         struct btrfs_extent_item *ei;
1253         struct btrfs_extent_inline_ref *iref;
1254         struct btrfs_extent_data_ref *dref;
1255         u64 bytenr;
1256         u64 num_bytes;
1257         u64 objectid;
1258         u64 last_byte;
1259         u64 first_free;
1260         u64 total_bytes;
1261         u32 sectorsize = root->sectorsize;
1262
1263         total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
1264         first_free =  BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1;
1265         first_free &= ~((u64)sectorsize - 1);
1266
1267         memset(&btrfs_inode, 0, sizeof(btrfs_inode));
1268         btrfs_set_stack_inode_generation(&btrfs_inode, 1);
1269         btrfs_set_stack_inode_size(&btrfs_inode, total_bytes);
1270         btrfs_set_stack_inode_nlink(&btrfs_inode, 1);
1271         btrfs_set_stack_inode_nbytes(&btrfs_inode, 0);
1272         btrfs_set_stack_inode_mode(&btrfs_inode, S_IFREG | 0400);
1273         btrfs_set_stack_inode_flags(&btrfs_inode, BTRFS_INODE_NODATASUM |
1274                                     BTRFS_INODE_READONLY);
1275         btrfs_init_path(&path);
1276         trans = btrfs_start_transaction(root, 1);
1277         BUG_ON(!trans);
1278
1279         objectid = btrfs_root_dirid(&root->root_item);
1280         ret = btrfs_find_free_objectid(trans, root, objectid, &objectid);
1281         if (ret)
1282                 goto fail;
1283
1284         /*
1285          * copy blocks covered by extent #0 to new positions. extent #0 is
1286          * special, we can't rely on relocate_extents_range to relocate it.
1287          */
1288         for (last_byte = 0; last_byte < first_free; last_byte += sectorsize) {
1289                 ret = custom_alloc_extent(root, sectorsize, 0, &key);
1290                 if (ret)
1291                         goto fail;
1292                 ret = copy_disk_extent(root, key.objectid, last_byte,
1293                                        sectorsize);
1294                 if (ret)
1295                         goto fail;
1296                 ret = record_file_extent(trans, root, objectid,
1297                                          &btrfs_inode, last_byte,
1298                                          key.objectid, sectorsize, 0);
1299                 if (ret)
1300                         goto fail;
1301         }
1302
1303         while(1) {
1304                 key.objectid = last_byte;
1305                 key.offset = 0;
1306                 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1307                 ret = btrfs_search_slot(trans, fs_info->extent_root,
1308                                         &key, &path, 0, 0);
1309                 if (ret < 0)
1310                         goto fail;
1311 next:
1312                 leaf = path.nodes[0];
1313                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1314                         ret = btrfs_next_leaf(extent_root, &path);
1315                         if (ret < 0)
1316                                 goto fail;
1317                         if (ret > 0)
1318                                 break;
1319                         leaf = path.nodes[0];
1320                 }
1321                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1322                 if (last_byte > key.objectid ||
1323                     key.type != BTRFS_EXTENT_ITEM_KEY) {
1324                         path.slots[0]++;
1325                         goto next;
1326                 }
1327
1328                 bytenr = key.objectid;
1329                 num_bytes = key.offset;
1330                 ei = btrfs_item_ptr(leaf, path.slots[0],
1331                                     struct btrfs_extent_item);
1332                 if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) {
1333                         path.slots[0]++;
1334                         goto next;
1335                 }
1336
1337                 BUG_ON(btrfs_item_size_nr(leaf, path.slots[0]) != sizeof(*ei) +
1338                        btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY));
1339
1340                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
1341                 key.type = btrfs_extent_inline_ref_type(leaf, iref);
1342                 BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
1343                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1344                 if (btrfs_extent_data_ref_root(leaf, dref) !=
1345                     BTRFS_FS_TREE_OBJECTID) {
1346                         path.slots[0]++;
1347                         goto next;
1348                 }
1349
1350                 if (bytenr > last_byte) {
1351                         ret = create_image_file_range(trans, root, objectid,
1352                                                       &btrfs_inode, last_byte,
1353                                                       bytenr, ext2_fs);
1354                         if (ret)
1355                                 goto fail;
1356                 }
1357                 ret = record_file_extent(trans, root, objectid, &btrfs_inode,
1358                                          bytenr, bytenr, num_bytes, 0);
1359                 if (ret)
1360                         goto fail;
1361                 last_byte = bytenr + num_bytes;
1362                 btrfs_release_path(&path);
1363
1364                 if (trans->blocks_used >= 4096) {
1365                         ret = btrfs_commit_transaction(trans, root);
1366                         BUG_ON(ret);
1367                         trans = btrfs_start_transaction(root, 1);
1368                         BUG_ON(!trans);
1369                 }
1370         }
1371         btrfs_release_path(&path);
1372         if (total_bytes > last_byte) {
1373                 ret = create_image_file_range(trans, root, objectid,
1374                                               &btrfs_inode, last_byte,
1375                                               total_bytes, ext2_fs);
1376                 if (ret)
1377                         goto fail;
1378         }
1379
1380         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1381         if (ret)
1382                 goto fail;
1383
1384         location.objectid = objectid;
1385         location.offset = 0;
1386         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1387         ret = btrfs_insert_dir_item(trans, root, name, strlen(name),
1388                                     btrfs_root_dirid(&root->root_item),
1389                                     &location, EXT2_FT_REG_FILE, objectid);
1390         if (ret)
1391                 goto fail;
1392         ret = btrfs_insert_inode_ref(trans, root, name, strlen(name),
1393                                      objectid,
1394                                      btrfs_root_dirid(&root->root_item),
1395                                      objectid);
1396         if (ret)
1397                 goto fail;
1398         location.objectid = btrfs_root_dirid(&root->root_item);
1399         location.offset = 0;
1400         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1401         ret = btrfs_lookup_inode(trans, root, &path, &location, 1);
1402         if (ret)
1403                 goto fail;
1404         leaf = path.nodes[0];
1405         inode_item = btrfs_item_ptr(leaf, path.slots[0],
1406                                     struct btrfs_inode_item);
1407         btrfs_set_inode_size(leaf, inode_item, strlen(name) * 2 +
1408                              btrfs_inode_size(leaf, inode_item));
1409         btrfs_mark_buffer_dirty(leaf);
1410         btrfs_release_path(&path);
1411         ret = btrfs_commit_transaction(trans, root);
1412         BUG_ON(ret);
1413 fail:
1414         btrfs_release_path(&path);
1415         return ret;
1416 }
1417
1418 static struct btrfs_root * link_subvol(struct btrfs_root *root,
1419                 const char *base, u64 root_objectid)
1420 {
1421         struct btrfs_trans_handle *trans;
1422         struct btrfs_fs_info *fs_info = root->fs_info;
1423         struct btrfs_root *tree_root = fs_info->tree_root;
1424         struct btrfs_root *new_root = NULL;
1425         struct btrfs_path *path;
1426         struct btrfs_inode_item *inode_item;
1427         struct extent_buffer *leaf;
1428         struct btrfs_key key;
1429         u64 dirid = btrfs_root_dirid(&root->root_item);
1430         u64 index = 2;
1431         char buf[BTRFS_NAME_LEN + 1]; /* for snprintf null */
1432         int len;
1433         int i;
1434         int ret;
1435
1436         len = strlen(base);
1437         if (len < 1 || len > BTRFS_NAME_LEN)
1438                 return NULL;
1439
1440         path = btrfs_alloc_path();
1441         BUG_ON(!path);
1442
1443         key.objectid = dirid;
1444         key.type = BTRFS_DIR_INDEX_KEY;
1445         key.offset = (u64)-1;
1446
1447         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1448         BUG_ON(ret <= 0);
1449
1450         if (path->slots[0] > 0) {
1451                 path->slots[0]--;
1452                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1453                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
1454                         index = key.offset + 1;
1455         }
1456         btrfs_release_path(path);
1457
1458         trans = btrfs_start_transaction(root, 1);
1459         BUG_ON(!trans);
1460
1461         key.objectid = dirid;
1462         key.offset = 0;
1463         key.type =  BTRFS_INODE_ITEM_KEY;
1464
1465         ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1466         BUG_ON(ret);
1467         leaf = path->nodes[0];
1468         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1469                                     struct btrfs_inode_item);
1470
1471         key.objectid = root_objectid;
1472         key.offset = (u64)-1;
1473         key.type = BTRFS_ROOT_ITEM_KEY;
1474
1475         memcpy(buf, base, len);
1476         for (i = 0; i < 1024; i++) {
1477                 ret = btrfs_insert_dir_item(trans, root, buf, len,
1478                                             dirid, &key, BTRFS_FT_DIR, index);
1479                 if (ret != -EEXIST)
1480                         break;
1481                 len = snprintf(buf, ARRAY_SIZE(buf), "%s%d", base, i);
1482                 if (len < 1 || len > BTRFS_NAME_LEN) {
1483                         ret = -EINVAL;
1484                         break;
1485                 }
1486         }
1487         if (ret)
1488                 goto fail;
1489
1490         btrfs_set_inode_size(leaf, inode_item, len * 2 +
1491                              btrfs_inode_size(leaf, inode_item));
1492         btrfs_mark_buffer_dirty(leaf);
1493         btrfs_release_path(path);
1494
1495         /* add the backref first */
1496         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
1497                                  BTRFS_ROOT_BACKREF_KEY,
1498                                  root->root_key.objectid,
1499                                  dirid, index, buf, len);
1500         BUG_ON(ret);
1501
1502         /* now add the forward ref */
1503         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
1504                                  BTRFS_ROOT_REF_KEY, root_objectid,
1505                                  dirid, index, buf, len);
1506
1507         ret = btrfs_commit_transaction(trans, root);
1508         BUG_ON(ret);
1509
1510         new_root = btrfs_read_fs_root(fs_info, &key);
1511         if (IS_ERR(new_root))
1512                 new_root = NULL;
1513 fail:
1514         btrfs_free_path(path);
1515         return new_root;
1516 }
1517
1518 static int create_chunk_mapping(struct btrfs_trans_handle *trans,
1519                                 struct btrfs_root *root)
1520 {
1521         struct btrfs_fs_info *info = root->fs_info;
1522         struct btrfs_root *chunk_root = info->chunk_root;
1523         struct btrfs_root *extent_root = info->extent_root;
1524         struct btrfs_device *device;
1525         struct btrfs_block_group_cache *cache;
1526         struct btrfs_dev_extent *extent;
1527         struct extent_buffer *leaf;
1528         struct btrfs_chunk chunk;
1529         struct btrfs_key key;
1530         struct btrfs_path path;
1531         u64 cur_start;
1532         u64 total_bytes;
1533         u64 chunk_objectid;
1534         int ret;
1535
1536         btrfs_init_path(&path);
1537
1538         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1539         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1540
1541         BUG_ON(list_empty(&info->fs_devices->devices));
1542         device = list_entry(info->fs_devices->devices.next,
1543                             struct btrfs_device, dev_list);
1544         BUG_ON(device->devid != info->fs_devices->latest_devid);
1545
1546         /* delete device extent created by make_btrfs */
1547         key.objectid = device->devid;
1548         key.offset = 0;
1549         key.type = BTRFS_DEV_EXTENT_KEY;
1550         ret = btrfs_search_slot(trans, device->dev_root, &key, &path, -1, 1);
1551         if (ret < 0)
1552                 goto err;
1553
1554         BUG_ON(ret > 0);
1555         ret = btrfs_del_item(trans, device->dev_root, &path);
1556         if (ret)
1557                 goto err;
1558         btrfs_release_path(&path);
1559
1560         /* delete chunk item created by make_btrfs */
1561         key.objectid = chunk_objectid;
1562         key.offset = 0;
1563         key.type = BTRFS_CHUNK_ITEM_KEY;
1564         ret = btrfs_search_slot(trans, chunk_root, &key, &path, -1, 1);
1565         if (ret < 0)
1566                 goto err;
1567
1568         BUG_ON(ret > 0);
1569         ret = btrfs_del_item(trans, chunk_root, &path);
1570         if (ret)
1571                 goto err;
1572         btrfs_release_path(&path);
1573
1574         /* for each block group, create device extent and chunk item */
1575         cur_start = 0;
1576         while (cur_start < total_bytes) {
1577                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
1578                 BUG_ON(!cache);
1579
1580                 /* insert device extent */
1581                 key.objectid = device->devid;
1582                 key.offset = cache->key.objectid;
1583                 key.type = BTRFS_DEV_EXTENT_KEY;
1584                 ret = btrfs_insert_empty_item(trans, device->dev_root, &path,
1585                                               &key, sizeof(*extent));
1586                 if (ret)
1587                         goto err;
1588
1589                 leaf = path.nodes[0];
1590                 extent = btrfs_item_ptr(leaf, path.slots[0],
1591                                         struct btrfs_dev_extent);
1592
1593                 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1594                                                 chunk_root->root_key.objectid);
1595                 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1596                                                     chunk_objectid);
1597                 btrfs_set_dev_extent_chunk_offset(leaf, extent,
1598                                                   cache->key.objectid);
1599                 btrfs_set_dev_extent_length(leaf, extent, cache->key.offset);
1600                 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1601                     (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
1602                     BTRFS_UUID_SIZE);
1603                 btrfs_mark_buffer_dirty(leaf);
1604                 btrfs_release_path(&path);
1605
1606                 /* insert chunk item */
1607                 btrfs_set_stack_chunk_length(&chunk, cache->key.offset);
1608                 btrfs_set_stack_chunk_owner(&chunk,
1609                                             extent_root->root_key.objectid);
1610                 btrfs_set_stack_chunk_stripe_len(&chunk, STRIPE_LEN);
1611                 btrfs_set_stack_chunk_type(&chunk, cache->flags);
1612                 btrfs_set_stack_chunk_io_align(&chunk, device->io_align);
1613                 btrfs_set_stack_chunk_io_width(&chunk, device->io_width);
1614                 btrfs_set_stack_chunk_sector_size(&chunk, device->sector_size);
1615                 btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1616                 btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1617                 btrfs_set_stack_stripe_devid(&chunk.stripe, device->devid);
1618                 btrfs_set_stack_stripe_offset(&chunk.stripe,
1619                                               cache->key.objectid);
1620                 memcpy(&chunk.stripe.dev_uuid, device->uuid, BTRFS_UUID_SIZE);
1621
1622                 key.objectid = chunk_objectid;
1623                 key.offset = cache->key.objectid;
1624                 key.type = BTRFS_CHUNK_ITEM_KEY;
1625
1626                 ret = btrfs_insert_item(trans, chunk_root, &key, &chunk,
1627                                         btrfs_chunk_item_size(1));
1628                 if (ret)
1629                         goto err;
1630
1631                 cur_start = cache->key.objectid + cache->key.offset;
1632         }
1633
1634         device->bytes_used = total_bytes;
1635         ret = btrfs_update_device(trans, device);
1636 err:
1637         btrfs_release_path(&path);
1638         return ret;
1639 }
1640
1641 static int create_subvol(struct btrfs_trans_handle *trans,
1642                          struct btrfs_root *root, u64 root_objectid)
1643 {
1644         struct extent_buffer *tmp;
1645         struct btrfs_root *new_root;
1646         struct btrfs_key key;
1647         struct btrfs_root_item root_item;
1648         int ret;
1649
1650         ret = btrfs_copy_root(trans, root, root->node, &tmp,
1651                               root_objectid);
1652         BUG_ON(ret);
1653
1654         memcpy(&root_item, &root->root_item, sizeof(root_item));
1655         btrfs_set_root_bytenr(&root_item, tmp->start);
1656         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
1657         btrfs_set_root_generation(&root_item, trans->transid);
1658         free_extent_buffer(tmp);
1659
1660         key.objectid = root_objectid;
1661         key.type = BTRFS_ROOT_ITEM_KEY;
1662         key.offset = trans->transid;
1663         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
1664                                 &key, &root_item);
1665
1666         key.offset = (u64)-1;
1667         new_root = btrfs_read_fs_root(root->fs_info, &key);
1668         BUG_ON(!new_root || IS_ERR(new_root));
1669
1670         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
1671         BUG_ON(ret);
1672
1673         return 0;
1674 }
1675
1676 static int init_btrfs(struct btrfs_root *root)
1677 {
1678         int ret;
1679         struct btrfs_key location;
1680         struct btrfs_trans_handle *trans;
1681         struct btrfs_fs_info *fs_info = root->fs_info;
1682         struct extent_buffer *tmp;
1683
1684         trans = btrfs_start_transaction(root, 1);
1685         BUG_ON(!trans);
1686         ret = btrfs_make_block_groups(trans, root);
1687         if (ret)
1688                 goto err;
1689         ret = btrfs_fix_block_accounting(trans, root);
1690         if (ret)
1691                 goto err;
1692         ret = create_chunk_mapping(trans, root);
1693         if (ret)
1694                 goto err;
1695         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1696                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1697         if (ret)
1698                 goto err;
1699         memcpy(&location, &root->root_key, sizeof(location));
1700         location.offset = (u64)-1;
1701         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1702                                 btrfs_super_root_dir(fs_info->super_copy),
1703                                 &location, BTRFS_FT_DIR, 0);
1704         if (ret)
1705                 goto err;
1706         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1707                                 location.objectid,
1708                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1709         if (ret)
1710                 goto err;
1711         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1712                              BTRFS_FIRST_FREE_OBJECTID);
1713
1714         /* subvol for ext2 image file */
1715         ret = create_subvol(trans, root, EXT2_IMAGE_SUBVOL_OBJECTID);
1716         BUG_ON(ret);
1717         /* subvol for data relocation */
1718         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1719         BUG_ON(ret);
1720
1721         ret = __btrfs_cow_block(trans, fs_info->csum_root,
1722                                 fs_info->csum_root->node, NULL, 0, &tmp, 0, 0);
1723         BUG_ON(ret);
1724         free_extent_buffer(tmp);
1725
1726         ret = btrfs_commit_transaction(trans, root);
1727         BUG_ON(ret);
1728 err:
1729         return ret;
1730 }
1731
1732 /*
1733  * Migrate super block to it's default position and zero 0 ~ 16k
1734  */
1735 static int migrate_super_block(int fd, u64 old_bytenr, u32 sectorsize)
1736 {
1737         int ret;
1738         struct extent_buffer *buf;
1739         struct btrfs_super_block *super;
1740         u32 len;
1741         u32 bytenr;
1742
1743         BUG_ON(sectorsize < sizeof(*super));
1744         buf = malloc(sizeof(*buf) + sectorsize);
1745         if (!buf)
1746                 return -ENOMEM;
1747
1748         buf->len = sectorsize;
1749         ret = pread(fd, buf->data, sectorsize, old_bytenr);
1750         if (ret != sectorsize)
1751                 goto fail;
1752
1753         super = (struct btrfs_super_block *)buf->data;
1754         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1755         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1756
1757         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1758         ret = pwrite(fd, buf->data, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1759         if (ret != sectorsize)
1760                 goto fail;
1761
1762         ret = fsync(fd);
1763         if (ret)
1764                 goto fail;
1765
1766         memset(buf->data, 0, sectorsize);
1767         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
1768                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
1769                 if (len > sectorsize)
1770                         len = sectorsize;
1771                 ret = pwrite(fd, buf->data, len, bytenr);
1772                 if (ret != len) {
1773                         fprintf(stderr, "unable to zero fill device\n");
1774                         break;
1775                 }
1776                 bytenr += len;
1777         }
1778         ret = 0;
1779         fsync(fd);
1780 fail:
1781         free(buf);
1782         if (ret > 0)
1783                 ret = -1;
1784         return ret;
1785 }
1786
1787 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
1788 {
1789         struct btrfs_chunk *chunk;
1790         struct btrfs_disk_key *key;
1791         u32 sectorsize = btrfs_super_sectorsize(super);
1792
1793         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1794         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1795                                        sizeof(struct btrfs_disk_key));
1796
1797         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1798         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1799         btrfs_set_disk_key_offset(key, 0);
1800
1801         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
1802         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1803         btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
1804         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1805         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1806         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1807         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1808         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1809         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1810         chunk->stripe.devid = super->dev_item.devid;
1811         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1812         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1813         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1814         return 0;
1815 }
1816
1817 static int prepare_system_chunk(int fd, u64 sb_bytenr, u32 sectorsize)
1818 {
1819         int ret;
1820         struct extent_buffer *buf;
1821         struct btrfs_super_block *super;
1822
1823         BUG_ON(sectorsize < sizeof(*super));
1824         buf = malloc(sizeof(*buf) + sectorsize);
1825         if (!buf)
1826                 return -ENOMEM;
1827
1828         buf->len = sectorsize;
1829         ret = pread(fd, buf->data, sectorsize, sb_bytenr);
1830         if (ret != sectorsize)
1831                 goto fail;
1832
1833         super = (struct btrfs_super_block *)buf->data;
1834         BUG_ON(btrfs_super_bytenr(super) != sb_bytenr);
1835         BUG_ON(btrfs_super_num_devices(super) != 1);
1836
1837         ret = prepare_system_chunk_sb(super);
1838         if (ret)
1839                 goto fail;
1840
1841         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1842         ret = pwrite(fd, buf->data, sectorsize, sb_bytenr);
1843         if (ret != sectorsize)
1844                 goto fail;
1845
1846         ret = 0;
1847 fail:
1848         free(buf);
1849         if (ret > 0)
1850                 ret = -1;
1851         return ret;
1852 }
1853
1854 static int relocate_one_reference(struct btrfs_trans_handle *trans,
1855                                   struct btrfs_root *root,
1856                                   u64 extent_start, u64 extent_size,
1857                                   struct btrfs_key *extent_key,
1858                                   struct extent_io_tree *reloc_tree)
1859 {
1860         struct extent_buffer *leaf;
1861         struct btrfs_file_extent_item *fi;
1862         struct btrfs_key key;
1863         struct btrfs_path path;
1864         struct btrfs_inode_item inode;
1865         struct blk_iterate_data data;
1866         u64 bytenr;
1867         u64 num_bytes;
1868         u64 cur_offset;
1869         u64 new_pos;
1870         u64 nbytes;
1871         u64 sector_end;
1872         u32 sectorsize = root->sectorsize;
1873         unsigned long ptr;
1874         int datacsum;
1875         int fd;
1876         int ret;
1877
1878         btrfs_init_path(&path);
1879         ret = btrfs_search_slot(trans, root, extent_key, &path, -1, 1);
1880         if (ret)
1881                 goto fail;
1882
1883         leaf = path.nodes[0];
1884         fi = btrfs_item_ptr(leaf, path.slots[0],
1885                             struct btrfs_file_extent_item);
1886         BUG_ON(btrfs_file_extent_offset(leaf, fi) > 0);
1887         if (extent_start != btrfs_file_extent_disk_bytenr(leaf, fi) ||
1888             extent_size != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1889                 ret = 1;
1890                 goto fail;
1891         }
1892
1893         bytenr = extent_start + btrfs_file_extent_offset(leaf, fi);
1894         num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
1895
1896         ret = btrfs_del_item(trans, root, &path);
1897         if (ret)
1898                 goto fail;
1899
1900         ret = btrfs_free_extent(trans, root, extent_start, extent_size, 0,
1901                                 root->root_key.objectid,
1902                                 extent_key->objectid, extent_key->offset);
1903         if (ret)
1904                 goto fail;
1905
1906         btrfs_release_path(&path);
1907
1908         key.objectid = extent_key->objectid;
1909         key.offset = 0;
1910         key.type =  BTRFS_INODE_ITEM_KEY;
1911         ret = btrfs_lookup_inode(trans, root, &path, &key, 0);
1912         if (ret)
1913                 goto fail;
1914
1915         leaf = path.nodes[0];
1916         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
1917         read_extent_buffer(leaf, &inode, ptr, sizeof(inode));
1918         btrfs_release_path(&path);
1919
1920         BUG_ON(num_bytes & (sectorsize - 1));
1921         nbytes = btrfs_stack_inode_nbytes(&inode) - num_bytes;
1922         btrfs_set_stack_inode_nbytes(&inode, nbytes);
1923         datacsum = !(btrfs_stack_inode_flags(&inode) & BTRFS_INODE_NODATASUM);
1924
1925         data = (struct blk_iterate_data) {
1926                 .trans          = trans,
1927                 .root           = root,
1928                 .inode          = &inode,
1929                 .objectid       = extent_key->objectid,
1930                 .first_block    = extent_key->offset / sectorsize,
1931                 .disk_block     = 0,
1932                 .num_blocks     = 0,
1933                 .boundary       = (u64)-1,
1934                 .checksum       = datacsum,
1935                 .errcode        = 0,
1936         };
1937
1938         cur_offset = extent_key->offset;
1939         while (num_bytes > 0) {
1940                 sector_end = bytenr + sectorsize - 1;
1941                 if (test_range_bit(reloc_tree, bytenr, sector_end,
1942                                    EXTENT_LOCKED, 1)) {
1943                         ret = get_state_private(reloc_tree, bytenr, &new_pos);
1944                         BUG_ON(ret);
1945                 } else {
1946                         ret = custom_alloc_extent(root, sectorsize, 0, &key);
1947                         if (ret)
1948                                 goto fail;
1949                         new_pos = key.objectid;
1950
1951                         if (cur_offset == extent_key->offset) {
1952                                 fd = root->fs_info->fs_devices->latest_bdev;
1953                                 readahead(fd, bytenr, num_bytes);
1954                         }
1955                         ret = copy_disk_extent(root, new_pos, bytenr,
1956                                                sectorsize);
1957                         if (ret)
1958                                 goto fail;
1959                         ret = set_extent_bits(reloc_tree, bytenr, sector_end,
1960                                               EXTENT_LOCKED, GFP_NOFS);
1961                         BUG_ON(ret);
1962                         ret = set_state_private(reloc_tree, bytenr, new_pos);
1963                         BUG_ON(ret);
1964                 }
1965
1966                 ret = block_iterate_proc(NULL, new_pos / sectorsize,
1967                                          cur_offset / sectorsize, &data);
1968                 if (ret & BLOCK_ABORT) {
1969                         ret = data.errcode;
1970                         goto fail;
1971                 }
1972
1973                 cur_offset += sectorsize;
1974                 bytenr += sectorsize;
1975                 num_bytes -= sectorsize;
1976         }
1977
1978         if (data.num_blocks > 0) {
1979                 ret = record_file_blocks(trans, root,
1980                                          extent_key->objectid, &inode,
1981                                          data.first_block, data.disk_block,
1982                                          data.num_blocks, datacsum);
1983                 if (ret)
1984                         goto fail;
1985         }
1986
1987         key.objectid = extent_key->objectid;
1988         key.offset = 0;
1989         key.type =  BTRFS_INODE_ITEM_KEY;
1990         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
1991         if (ret)
1992                 goto fail;
1993
1994         leaf = path.nodes[0];
1995         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
1996         write_extent_buffer(leaf, &inode, ptr, sizeof(inode));
1997         btrfs_mark_buffer_dirty(leaf);
1998         btrfs_release_path(&path);
1999
2000 fail:
2001         btrfs_release_path(&path);
2002         return ret;
2003 }
2004
2005 static int relocate_extents_range(struct btrfs_root *fs_root,
2006                                   struct btrfs_root *ext2_root,
2007                                   u64 start_byte, u64 end_byte)
2008 {
2009         struct btrfs_fs_info *info = fs_root->fs_info;
2010         struct btrfs_root *extent_root = info->extent_root;
2011         struct btrfs_root *cur_root = NULL;
2012         struct btrfs_trans_handle *trans;
2013         struct btrfs_extent_data_ref *dref;
2014         struct btrfs_extent_inline_ref *iref;
2015         struct btrfs_extent_item *ei;
2016         struct extent_buffer *leaf;
2017         struct btrfs_key key;
2018         struct btrfs_key extent_key;
2019         struct btrfs_path path;
2020         struct extent_io_tree reloc_tree;
2021         unsigned long ptr;
2022         unsigned long end;
2023         u64 cur_byte;
2024         u64 num_bytes;
2025         u64 ref_root;
2026         u64 num_extents;
2027         int pass = 0;
2028         int ret;
2029
2030         btrfs_init_path(&path);
2031         extent_io_tree_init(&reloc_tree);
2032
2033         key.objectid = start_byte;
2034         key.offset = 0;
2035         key.type = BTRFS_EXTENT_ITEM_KEY;
2036         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2037         if (ret < 0)
2038                 goto fail;
2039         if (ret > 0) {
2040                 ret = btrfs_previous_item(extent_root, &path, 0,
2041                                           BTRFS_EXTENT_ITEM_KEY);
2042                 if (ret < 0)
2043                         goto fail;
2044                 if (ret == 0) {
2045                         leaf = path.nodes[0];
2046                         btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2047                         if (key.objectid + key.offset > start_byte)
2048                                 start_byte = key.objectid;
2049                 }
2050         }
2051         btrfs_release_path(&path);
2052 again:
2053         cur_root = (pass % 2 == 0) ? ext2_root : fs_root;
2054         num_extents = 0;
2055
2056         trans = btrfs_start_transaction(cur_root, 1);
2057         BUG_ON(!trans);
2058
2059         cur_byte = start_byte;
2060         while (1) {
2061                 key.objectid = cur_byte;
2062                 key.offset = 0;
2063                 key.type = BTRFS_EXTENT_ITEM_KEY;
2064                 ret = btrfs_search_slot(trans, extent_root,
2065                                         &key, &path, 0, 0);
2066                 if (ret < 0)
2067                         goto fail;
2068 next:
2069                 leaf = path.nodes[0];
2070                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2071                         ret = btrfs_next_leaf(extent_root, &path);
2072                         if (ret < 0)
2073                                 goto fail;
2074                         if (ret > 0)
2075                                 break;
2076                         leaf = path.nodes[0];
2077                 }
2078
2079                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2080                 if (key.objectid < cur_byte ||
2081                     key.type != BTRFS_EXTENT_ITEM_KEY) {
2082                         path.slots[0]++;
2083                         goto next;
2084                 }
2085                 if (key.objectid >= end_byte)
2086                         break;
2087
2088                 num_extents++;
2089
2090                 cur_byte = key.objectid;
2091                 num_bytes = key.offset;
2092                 ei = btrfs_item_ptr(leaf, path.slots[0],
2093                                     struct btrfs_extent_item);
2094                 BUG_ON(!(btrfs_extent_flags(leaf, ei) &
2095                          BTRFS_EXTENT_FLAG_DATA));
2096
2097                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2098                 end = ptr + btrfs_item_size_nr(leaf, path.slots[0]);
2099
2100                 ptr += sizeof(struct btrfs_extent_item);
2101
2102                 while (ptr < end) {
2103                         iref = (struct btrfs_extent_inline_ref *)ptr;
2104                         key.type = btrfs_extent_inline_ref_type(leaf, iref);
2105                         BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
2106                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
2107                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
2108                         extent_key.objectid =
2109                                 btrfs_extent_data_ref_objectid(leaf, dref);
2110                         extent_key.offset =
2111                                 btrfs_extent_data_ref_offset(leaf, dref);
2112                         extent_key.type = BTRFS_EXTENT_DATA_KEY;
2113                         BUG_ON(btrfs_extent_data_ref_count(leaf, dref) != 1);
2114
2115                         if (ref_root == cur_root->root_key.objectid)
2116                                 break;
2117
2118                         ptr += btrfs_extent_inline_ref_size(key.type);
2119                 }
2120
2121                 if (ptr >= end) {
2122                         path.slots[0]++;
2123                         goto next;
2124                 }
2125
2126                 ret = relocate_one_reference(trans, cur_root, cur_byte,
2127                                              num_bytes, &extent_key,
2128                                              &reloc_tree);
2129                 if (ret < 0)
2130                         goto fail;
2131
2132                 cur_byte += num_bytes;
2133                 btrfs_release_path(&path);
2134
2135                 if (trans->blocks_used >= 4096) {
2136                         ret = btrfs_commit_transaction(trans, cur_root);
2137                         BUG_ON(ret);
2138                         trans = btrfs_start_transaction(cur_root, 1);
2139                         BUG_ON(!trans);
2140                 }
2141         }
2142         btrfs_release_path(&path);
2143
2144         ret = btrfs_commit_transaction(trans, cur_root);
2145         BUG_ON(ret);
2146
2147         if (num_extents > 0 && pass++ < 16)
2148                 goto again;
2149
2150         ret = (num_extents > 0) ? -1 : 0;
2151 fail:
2152         btrfs_release_path(&path);
2153         extent_io_tree_cleanup(&reloc_tree);
2154         return ret;
2155 }
2156
2157 /*
2158  * relocate data in system chunk
2159  */
2160 static int cleanup_sys_chunk(struct btrfs_root *fs_root,
2161                              struct btrfs_root *ext2_root)
2162 {
2163         struct btrfs_block_group_cache *cache;
2164         int i, ret = 0;
2165         u64 offset = 0;
2166         u64 end_byte;
2167
2168         while(1) {
2169                 cache = btrfs_lookup_block_group(fs_root->fs_info, offset);
2170                 if (!cache)
2171                         break;
2172
2173                 end_byte = cache->key.objectid + cache->key.offset;
2174                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2175                         ret = relocate_extents_range(fs_root, ext2_root,
2176                                                      cache->key.objectid,
2177                                                      end_byte);
2178                         if (ret)
2179                                 goto fail;
2180                 }
2181                 offset = end_byte;
2182         }
2183         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2184                 offset = btrfs_sb_offset(i);
2185                 offset &= ~((u64)STRIPE_LEN - 1);
2186
2187                 ret = relocate_extents_range(fs_root, ext2_root,
2188                                              offset, offset + STRIPE_LEN);
2189                 if (ret)
2190                         goto fail;
2191         }
2192         ret = 0;
2193 fail:
2194         return ret;
2195 }
2196
2197 static int fixup_chunk_mapping(struct btrfs_root *root)
2198 {
2199         struct btrfs_trans_handle *trans;
2200         struct btrfs_fs_info *info = root->fs_info;
2201         struct btrfs_root *chunk_root = info->chunk_root;
2202         struct extent_buffer *leaf;
2203         struct btrfs_key key;
2204         struct btrfs_path path;
2205         struct btrfs_chunk chunk;
2206         unsigned long ptr;
2207         u32 size;
2208         u64 type;
2209         int ret;
2210
2211         btrfs_init_path(&path);
2212
2213         trans = btrfs_start_transaction(root, 1);
2214         BUG_ON(!trans);
2215
2216         /*
2217          * recow the whole chunk tree. this will move all chunk tree blocks
2218          * into system block group.
2219          */
2220         memset(&key, 0, sizeof(key));
2221         while (1) {
2222                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2223                 if (ret < 0)
2224                         goto err;
2225
2226                 ret = btrfs_next_leaf(chunk_root, &path);
2227                 if (ret < 0)
2228                         goto err;
2229                 if (ret > 0)
2230                         break;
2231
2232                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2233                 btrfs_release_path(&path);
2234         }
2235         btrfs_release_path(&path);
2236
2237         /* fixup the system chunk array in super block */
2238         btrfs_set_super_sys_array_size(info->super_copy, 0);
2239
2240         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2241         key.offset = 0;
2242         key.type = BTRFS_CHUNK_ITEM_KEY;
2243
2244         ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 0);
2245         if (ret < 0)
2246                 goto err;
2247         BUG_ON(ret != 0);
2248         while(1) {
2249                 leaf = path.nodes[0];
2250                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2251                         ret = btrfs_next_leaf(chunk_root, &path);
2252                         if (ret < 0)
2253                                 goto err;
2254                         if (ret > 0)
2255                                 break;
2256                         leaf = path.nodes[0];
2257                 }
2258                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2259                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2260                         goto next;
2261
2262                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2263                 size = btrfs_item_size_nr(leaf, path.slots[0]);
2264                 BUG_ON(size != sizeof(chunk));
2265                 read_extent_buffer(leaf, &chunk, ptr, size);
2266                 type = btrfs_stack_chunk_type(&chunk);
2267
2268                 if (!(type & BTRFS_BLOCK_GROUP_SYSTEM))
2269                         goto next;
2270
2271                 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
2272                                              &chunk, size);
2273                 if (ret)
2274                         goto err;
2275 next:
2276                 path.slots[0]++;
2277         }
2278
2279         ret = btrfs_commit_transaction(trans, root);
2280         BUG_ON(ret);
2281 err:
2282         btrfs_release_path(&path);
2283         return ret;
2284 }
2285
2286 static int do_convert(const char *devname, int datacsum, int packing,
2287                 int noxattr)
2288 {
2289         int i, ret;
2290         int fd = -1;
2291         u32 blocksize;
2292         u64 blocks[7];
2293         u64 total_bytes;
2294         u64 super_bytenr;
2295         ext2_filsys ext2_fs;
2296         struct btrfs_root *root;
2297         struct btrfs_root *ext2_root;
2298
2299         ret = open_ext2fs(devname, &ext2_fs);
2300         if (ret) {
2301                 fprintf(stderr, "unable to open the Ext2fs\n");
2302                 goto fail;
2303         }
2304         blocksize = ext2_fs->blocksize;
2305         total_bytes = (u64)ext2_fs->super->s_blocks_count * blocksize;
2306         if (blocksize < 4096) {
2307                 fprintf(stderr, "block size is too small\n");
2308                 goto fail;
2309         }
2310         if (!(ext2_fs->super->s_feature_incompat &
2311               EXT2_FEATURE_INCOMPAT_FILETYPE)) {
2312                 fprintf(stderr, "filetype feature is missing\n");
2313                 goto fail;
2314         }
2315         for (i = 0; i < 7; i++) {
2316                 ret = ext2_alloc_block(ext2_fs, 0, blocks + i);
2317                 if (ret) {
2318                         fprintf(stderr, "not enough free space\n");
2319                         goto fail;
2320                 }
2321                 blocks[i] *= blocksize;
2322         }
2323         super_bytenr = blocks[0];
2324         fd = open(devname, O_RDWR);
2325         if (fd < 0) {
2326                 fprintf(stderr, "unable to open %s\n", devname);
2327                 goto fail;
2328         }
2329         ret = make_btrfs(fd, devname, ext2_fs->super->s_volume_name,
2330                          blocks, total_bytes, blocksize, blocksize,
2331                          blocksize, blocksize, 0);
2332         if (ret) {
2333                 fprintf(stderr, "unable to create initial ctree: %s\n",
2334                         strerror(-ret));
2335                 goto fail;
2336         }
2337         /* create a system chunk that maps the whole device */
2338         ret = prepare_system_chunk(fd, super_bytenr, blocksize);
2339         if (ret) {
2340                 fprintf(stderr, "unable to update system chunk\n");
2341                 goto fail;
2342         }
2343         root = open_ctree_fd(fd, devname, super_bytenr, O_RDWR);
2344         if (!root) {
2345                 fprintf(stderr, "unable to open ctree\n");
2346                 goto fail;
2347         }
2348         ret = cache_free_extents(root, ext2_fs);
2349         if (ret) {
2350                 fprintf(stderr, "error during cache_free_extents %d\n", ret);
2351                 goto fail;
2352         }
2353         root->fs_info->extent_ops = &extent_ops;
2354         /* recover block allocation bitmap */
2355         for (i = 0; i < 7; i++) {
2356                 blocks[i] /= blocksize;
2357                 ext2_free_block(ext2_fs, blocks[i]);
2358         }
2359         ret = init_btrfs(root);
2360         if (ret) {
2361                 fprintf(stderr, "unable to setup the root tree\n");
2362                 goto fail;
2363         }
2364         printf("creating btrfs metadata.\n");
2365         ret = copy_inodes(root, ext2_fs, datacsum, packing, noxattr);
2366         if (ret) {
2367                 fprintf(stderr, "error during copy_inodes %d\n", ret);
2368                 goto fail;
2369         }
2370         printf("creating ext2fs image file.\n");
2371         ext2_root = link_subvol(root, "ext2_saved", EXT2_IMAGE_SUBVOL_OBJECTID);
2372         if (!ext2_root) {
2373                 fprintf(stderr, "unable to create subvol\n");
2374                 goto fail;
2375         }
2376         ret = create_ext2_image(ext2_root, ext2_fs, "image");
2377         if (ret) {
2378                 fprintf(stderr, "error during create_ext2_image %d\n", ret);
2379                 goto fail;
2380         }
2381         printf("cleaning up system chunk.\n");
2382         ret = cleanup_sys_chunk(root, ext2_root);
2383         if (ret) {
2384                 fprintf(stderr, "error during cleanup_sys_chunk %d\n", ret);
2385                 goto fail;
2386         }
2387         ret = close_ctree(root);
2388         if (ret) {
2389                 fprintf(stderr, "error during close_ctree %d\n", ret);
2390                 goto fail;
2391         }
2392         close_ext2fs(ext2_fs);
2393
2394         /*
2395          * If this step succeed, we get a mountable btrfs. Otherwise
2396          * the ext2fs is left unchanged.
2397          */
2398         ret = migrate_super_block(fd, super_bytenr, blocksize);
2399         if (ret) {
2400                 fprintf(stderr, "unable to migrate super block\n");
2401                 goto fail;
2402         }
2403
2404         root = open_ctree_fd(fd, devname, 0, O_RDWR);
2405         if (!root) {
2406                 fprintf(stderr, "unable to open ctree\n");
2407                 goto fail;
2408         }
2409         /* move chunk tree into system chunk. */
2410         ret = fixup_chunk_mapping(root);
2411         if (ret) {
2412                 fprintf(stderr, "error during fixup_chunk_tree\n");
2413                 goto fail;
2414         }
2415         ret = close_ctree(root);
2416         close(fd);
2417
2418         printf("conversion complete.\n");
2419         return 0;
2420 fail:
2421         if (fd != -1)
2422                 close(fd);
2423         fprintf(stderr, "conversion aborted.\n");
2424         return -1;
2425 }
2426
2427 static int may_rollback(struct btrfs_root *root)
2428 {
2429         struct btrfs_fs_info *info = root->fs_info;
2430         struct btrfs_multi_bio *multi = NULL;
2431         u64 bytenr;
2432         u64 length;
2433         u64 physical;
2434         u64 total_bytes;
2435         int num_stripes;
2436         int ret;
2437
2438         if (btrfs_super_num_devices(info->super_copy) != 1)
2439                 goto fail;
2440
2441         bytenr = BTRFS_SUPER_INFO_OFFSET;
2442         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
2443
2444         while (1) {
2445                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
2446                                       &length, &multi, 0, NULL);
2447                 if (ret)
2448                         goto fail;
2449
2450                 num_stripes = multi->num_stripes;
2451                 physical = multi->stripes[0].physical;
2452                 kfree(multi);
2453
2454                 if (num_stripes != 1 || physical != bytenr)
2455                         goto fail;
2456
2457                 bytenr += length;
2458                 if (bytenr >= total_bytes)
2459                         break;
2460         }
2461         return 0;
2462 fail:
2463         return -1;
2464 }
2465
2466 static int do_rollback(const char *devname, int force)
2467 {
2468         int fd = -1;
2469         int ret;
2470         int i;
2471         struct btrfs_root *root;
2472         struct btrfs_root *ext2_root;
2473         struct btrfs_root *chunk_root;
2474         struct btrfs_dir_item *dir;
2475         struct btrfs_inode_item *inode;
2476         struct btrfs_file_extent_item *fi;
2477         struct btrfs_trans_handle *trans;
2478         struct extent_buffer *leaf;
2479         struct btrfs_block_group_cache *cache1;
2480         struct btrfs_block_group_cache *cache2;
2481         struct btrfs_key key;
2482         struct btrfs_path path;
2483         struct extent_io_tree io_tree;
2484         char *buf = NULL;
2485         char *name;
2486         u64 bytenr;
2487         u64 num_bytes;
2488         u64 root_dir;
2489         u64 objectid;
2490         u64 offset;
2491         u64 start;
2492         u64 end;
2493         u64 sb_bytenr;
2494         u64 first_free;
2495         u64 total_bytes;
2496         u32 sectorsize;
2497
2498         extent_io_tree_init(&io_tree);
2499
2500         fd = open(devname, O_RDWR);
2501         if (fd < 0) {
2502                 fprintf(stderr, "unable to open %s\n", devname);
2503                 goto fail;
2504         }
2505         root = open_ctree_fd(fd, devname, 0, O_RDWR);
2506         if (!root) {
2507                 fprintf(stderr, "unable to open ctree\n");
2508                 goto fail;
2509         }
2510         ret = may_rollback(root);
2511         if (ret < 0) {
2512                 fprintf(stderr, "unable to do rollback\n");
2513                 goto fail;
2514         }
2515
2516         sectorsize = root->sectorsize;
2517         buf = malloc(sectorsize);
2518         if (!buf) {
2519                 fprintf(stderr, "unable to allocate memory\n");
2520                 goto fail;
2521         }
2522
2523         btrfs_init_path(&path);
2524
2525         key.objectid = EXT2_IMAGE_SUBVOL_OBJECTID;
2526         key.type = BTRFS_ROOT_ITEM_KEY;
2527         key.offset = (u64)-1;
2528         ext2_root = btrfs_read_fs_root(root->fs_info, &key);
2529         if (!ext2_root || IS_ERR(ext2_root)) {
2530                 fprintf(stderr, "unable to open subvol %llu\n",
2531                         key.objectid);
2532                 goto fail;
2533         }
2534
2535         name = "image";
2536         root_dir = btrfs_root_dirid(&root->root_item);
2537         dir = btrfs_lookup_dir_item(NULL, ext2_root, &path,
2538                                    root_dir, name, strlen(name), 0);
2539         if (!dir || IS_ERR(dir)) {
2540                 fprintf(stderr, "unable to find file %s\n", name);
2541                 goto fail;
2542         }
2543         leaf = path.nodes[0];
2544         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
2545         btrfs_release_path(&path);
2546
2547         objectid = key.objectid;
2548
2549         ret = btrfs_lookup_inode(NULL, ext2_root, &path, &key, 0);
2550         if (ret) {
2551                 fprintf(stderr, "unable to find inode item\n");
2552                 goto fail;
2553         }
2554         leaf = path.nodes[0];
2555         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
2556         total_bytes = btrfs_inode_size(leaf, inode);
2557         btrfs_release_path(&path);
2558
2559         key.objectid = objectid;
2560         key.offset = 0;
2561         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2562         ret = btrfs_search_slot(NULL, ext2_root, &key, &path, 0, 0);
2563         if (ret != 0) {
2564                 fprintf(stderr, "unable to find first file extent\n");
2565                 btrfs_release_path(&path);
2566                 goto fail;
2567         }
2568
2569         /* build mapping tree for the relocated blocks */
2570         for (offset = 0; offset < total_bytes; ) {
2571                 leaf = path.nodes[0];
2572                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2573                         ret = btrfs_next_leaf(root, &path);
2574                         if (ret != 0)
2575                                 break;  
2576                         continue;
2577                 }
2578
2579                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2580                 if (key.objectid != objectid || key.offset != offset ||
2581                     btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2582                         break;
2583
2584                 fi = btrfs_item_ptr(leaf, path.slots[0],
2585                                     struct btrfs_file_extent_item);
2586                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2587                         break;
2588                 if (btrfs_file_extent_compression(leaf, fi) ||
2589                     btrfs_file_extent_encryption(leaf, fi) ||
2590                     btrfs_file_extent_other_encoding(leaf, fi))
2591                         break;
2592
2593                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2594                 /* skip holes and direct mapped extents */
2595                 if (bytenr == 0 || bytenr == offset)
2596                         goto next_extent;
2597
2598                 bytenr += btrfs_file_extent_offset(leaf, fi);
2599                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
2600
2601                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
2602                 cache2 =  btrfs_lookup_block_group(root->fs_info,
2603                                                    offset + num_bytes - 1);
2604                 if (!cache1 || cache1 != cache2 ||
2605                     (!(cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
2606                      !intersect_with_sb(offset, num_bytes)))
2607                         break;
2608
2609                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
2610                                 EXTENT_LOCKED, GFP_NOFS);
2611                 set_state_private(&io_tree, offset, bytenr);
2612 next_extent:
2613                 offset += btrfs_file_extent_num_bytes(leaf, fi);
2614                 path.slots[0]++;
2615         }
2616         btrfs_release_path(&path);
2617
2618         if (offset < total_bytes) {
2619                 fprintf(stderr, "unable to build extent mapping\n");
2620                 goto fail;
2621         }
2622
2623         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
2624         first_free &= ~((u64)sectorsize - 1);
2625         /* backup for extent #0 should exist */
2626         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
2627                 fprintf(stderr, "no backup for the first extent\n");
2628                 goto fail;
2629         }
2630         /* force no allocation from system block group */
2631         root->fs_info->system_allocs = -1;
2632         trans = btrfs_start_transaction(root, 1);
2633         BUG_ON(!trans);
2634         /*
2635          * recow the whole chunk tree, this will remove all chunk tree blocks
2636          * from system block group
2637          */
2638         chunk_root = root->fs_info->chunk_root;
2639         memset(&key, 0, sizeof(key));
2640         while (1) {
2641                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2642                 if (ret < 0)
2643                         break;
2644
2645                 ret = btrfs_next_leaf(chunk_root, &path);
2646                 if (ret)
2647                         break;
2648
2649                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2650                 btrfs_release_path(&path);
2651         }
2652         btrfs_release_path(&path);
2653
2654         offset = 0;
2655         num_bytes = 0;
2656         while(1) {
2657                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
2658                 if (!cache1)
2659                         break;
2660
2661                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
2662                         num_bytes += btrfs_block_group_used(&cache1->item);
2663
2664                 offset = cache1->key.objectid + cache1->key.offset;
2665         }
2666         /* only extent #0 left in system block group? */
2667         if (num_bytes > first_free) {
2668                 fprintf(stderr, "unable to empty system block group\n");
2669                 goto fail;
2670         }
2671         /* create a system chunk that maps the whole device */
2672         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
2673         if (ret) {
2674                 fprintf(stderr, "unable to update system chunk\n");
2675                 goto fail;
2676         }
2677
2678         ret = btrfs_commit_transaction(trans, root);
2679         BUG_ON(ret);
2680
2681         ret = close_ctree(root);
2682         if (ret) {
2683                 fprintf(stderr, "error during close_ctree %d\n", ret);
2684                 goto fail;
2685         }
2686
2687         /* zero btrfs super block mirrors */
2688         memset(buf, 0, sectorsize);
2689         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2690                 bytenr = btrfs_sb_offset(i);
2691                 if (bytenr >= total_bytes)
2692                         break;
2693                 ret = pwrite(fd, buf, sectorsize, bytenr);
2694         }
2695
2696         sb_bytenr = (u64)-1;
2697         /* copy all relocated blocks back */
2698         while(1) {
2699                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
2700                                             EXTENT_LOCKED);
2701                 if (ret)
2702                         break;
2703
2704                 ret = get_state_private(&io_tree, start, &bytenr);
2705                 BUG_ON(ret);
2706
2707                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
2708                                   GFP_NOFS);
2709
2710                 while (start <= end) {
2711                         if (start == BTRFS_SUPER_INFO_OFFSET) {
2712                                 sb_bytenr = bytenr;
2713                                 goto next_sector;
2714                         }
2715                         ret = pread(fd, buf, sectorsize, bytenr);
2716                         if (ret < 0) {
2717                                 fprintf(stderr, "error during pread %d\n", ret);
2718                                 goto fail;
2719                         }
2720                         BUG_ON(ret != sectorsize);
2721                         ret = pwrite(fd, buf, sectorsize, start);
2722                         if (ret < 0) {
2723                                 fprintf(stderr, "error during pwrite %d\n", ret);
2724                                 goto fail;
2725                         }
2726                         BUG_ON(ret != sectorsize);
2727 next_sector:
2728                         start += sectorsize;
2729                         bytenr += sectorsize;
2730                 }
2731         }
2732
2733         ret = fsync(fd);
2734         if (ret) {
2735                 fprintf(stderr, "error during fsync %d\n", ret);
2736                 goto fail;
2737         }
2738         /*
2739          * finally, overwrite btrfs super block.
2740          */
2741         ret = pread(fd, buf, sectorsize, sb_bytenr);
2742         if (ret < 0) {
2743                 fprintf(stderr, "error during pread %d\n", ret);
2744                 goto fail;
2745         }
2746         BUG_ON(ret != sectorsize);
2747         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
2748         if (ret < 0) {
2749                 fprintf(stderr, "error during pwrite %d\n", ret);
2750                 goto fail;
2751         }
2752         BUG_ON(ret != sectorsize);
2753         ret = fsync(fd);
2754         if (ret) {
2755                 fprintf(stderr, "error during fsync %d\n", ret);
2756                 goto fail;
2757         }
2758
2759         close(fd);
2760         free(buf);
2761         extent_io_tree_cleanup(&io_tree);
2762         printf("rollback complete.\n");
2763         return 0;
2764
2765 fail:
2766         if (fd != -1)
2767                 close(fd);
2768         free(buf);
2769         fprintf(stderr, "rollback aborted.\n");
2770         return -1;
2771 }
2772
2773 static void print_usage(void)
2774 {
2775         printf("usage: btrfs-convert [-d] [-i] [-n] [-r] device\n");
2776         printf("\t-d disable data checksum\n");
2777         printf("\t-i ignore xattrs and ACLs\n");
2778         printf("\t-n disable packing of small files\n");
2779         printf("\t-r roll back to ext2fs\n");
2780 }
2781
2782 int main(int argc, char *argv[])
2783 {
2784         int ret;
2785         int packing = 1;
2786         int noxattr = 0;
2787         int datacsum = 1;
2788         int rollback = 0;
2789         char *file;
2790         while(1) {
2791                 int c = getopt(argc, argv, "dinr");
2792                 if (c < 0)
2793                         break;
2794                 switch(c) {
2795                         case 'd':
2796                                 datacsum = 0;
2797                                 break;
2798                         case 'i':
2799                                 noxattr = 1;
2800                                 break;
2801                         case 'n':
2802                                 packing = 0;
2803                                 break;
2804                         case 'r':
2805                                 rollback = 1;
2806                                 break;
2807                         default:
2808                                 print_usage();
2809                                 return 1;
2810                 }
2811         }
2812         argc = argc - optind;
2813         if (argc != 1) {
2814                 print_usage();
2815                 return 1;
2816         }
2817
2818         file = argv[optind];
2819         if (check_mounted(file)) {
2820                 fprintf(stderr, "%s is mounted\n", file);
2821                 return 1;
2822         }
2823
2824         if (rollback) {
2825                 ret = do_rollback(file, 0);
2826         } else {
2827                 ret = do_convert(file, datacsum, packing, noxattr);
2828         }
2829         if (ret)
2830                 return 1;
2831         return 0;
2832 }