Btrfs-progs: Use /proc/mounts instead of /etc/mtab
[platform/upstream/btrfs-progs.git] / convert.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 600
20 #define _GNU_SOURCE 1
21
22 #include "kerncompat.h"
23
24 #ifndef __CHECKER__
25 #include <sys/ioctl.h>
26 #include <sys/mount.h>
27 #endif
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <sys/acl.h>
33 #include <fcntl.h>
34 #include <unistd.h>
35 #include <uuid/uuid.h>
36 #include <linux/fs.h>
37
38 #include "ctree.h"
39 #include "disk-io.h"
40 #include "volumes.h"
41 #include "transaction.h"
42 #include "crc32c.h"
43 #include "utils.h"
44 #include <ext2fs/ext2_fs.h>
45 #include <ext2fs/ext2fs.h>
46 #include <ext2fs/ext2_ext_attr.h>
47
48 #define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO)
49 #define STRIPE_LEN (64 * 1024)
50 #define EXT2_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID
51
52 /*
53  * Open Ext2fs in readonly mode, read block allocation bitmap and
54  * inode bitmap into memory.
55  */
56 static int open_ext2fs(const char *name, ext2_filsys *ret_fs)
57 {
58         errcode_t ret;
59         ext2_filsys ext2_fs;
60         ext2_ino_t ino;
61         ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs);
62         if (ret) {
63                 fprintf(stderr, "ext2fs_open: %s\n", error_message(ret));
64                 goto fail;
65         }
66         ret = ext2fs_read_inode_bitmap(ext2_fs);
67         if (ret) {
68                 fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n",
69                         error_message(ret));
70                 goto fail;
71         }
72         ret = ext2fs_read_block_bitmap(ext2_fs);
73         if (ret) {
74                 fprintf(stderr, "ext2fs_read_block_bitmap: %s\n",
75                         error_message(ret));
76                 goto fail;
77         }
78         /*
79          * search each block group for a free inode. this set up
80          * uninit block/inode bitmaps appropriately.
81          */
82         ino = 1;
83         while (ino <= ext2_fs->super->s_inodes_count) {
84                 ext2_ino_t foo;
85                 ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo);
86                 ino += EXT2_INODES_PER_GROUP(ext2_fs->super);
87         }
88
89         *ret_fs = ext2_fs;
90         return 0;
91 fail:
92         return -1;
93 }
94
95 static int close_ext2fs(ext2_filsys fs)
96 {
97         ext2fs_close(fs);
98         return 0;
99 }
100
101 static int ext2_alloc_block(ext2_filsys fs, u64 goal, u64 *block_ret)
102 {
103         blk_t block;
104
105         if (!ext2fs_new_block(fs, goal, NULL, &block)) {
106                 ext2fs_fast_mark_block_bitmap(fs->block_map, block);
107                 *block_ret = block;
108                 return 0;
109         }
110         return -ENOSPC;
111 }
112
113 static int ext2_free_block(ext2_filsys fs, u64 block)
114 {
115         BUG_ON(block != (blk_t)block);
116         ext2fs_fast_unmark_block_bitmap(fs->block_map, block);
117         return 0;
118 }
119
120 static int cache_free_extents(struct btrfs_root *root, ext2_filsys ext2_fs)
121
122 {
123         int i, ret = 0;
124         blk_t block;
125         u64 bytenr;
126         u64 blocksize = ext2_fs->blocksize;
127
128         block = ext2_fs->super->s_first_data_block;
129         for (; block < ext2_fs->super->s_blocks_count; block++) {
130                 if (ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block))
131                         continue;
132                 bytenr = block * blocksize;
133                 ret = set_extent_dirty(&root->fs_info->free_space_cache,
134                                        bytenr, bytenr + blocksize - 1, 0);
135                 BUG_ON(ret);
136         }
137
138         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
139                 bytenr = btrfs_sb_offset(i);
140                 bytenr &= ~((u64)STRIPE_LEN - 1);
141                 if (bytenr >= blocksize * ext2_fs->super->s_blocks_count)
142                         break;
143                 clear_extent_dirty(&root->fs_info->free_space_cache, bytenr,
144                                    bytenr + STRIPE_LEN - 1, 0);
145         }
146
147         clear_extent_dirty(&root->fs_info->free_space_cache,
148                            0, BTRFS_SUPER_INFO_OFFSET - 1, 0);
149
150         return 0;
151 }
152
153 static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes,
154                                u64 hint_byte, struct btrfs_key *ins)
155 {
156         u64 start;
157         u64 end;
158         u64 last = hint_byte;
159         int ret;
160         int wrapped = 0;
161         struct btrfs_block_group_cache *cache;
162
163         while(1) {
164                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
165                                             last, &start, &end, EXTENT_DIRTY);
166                 if (ret) {
167                         if (wrapped++ == 0) {
168                                 last = 0;
169                                 continue;
170                         } else {
171                                 goto fail;
172                         }
173                 }
174
175                 start = max(last, start);
176                 last = end + 1;
177                 if (last - start < num_bytes)
178                         continue;
179
180                 last = start + num_bytes;
181                 if (test_range_bit(&root->fs_info->pinned_extents,
182                                    start, last - 1, EXTENT_DIRTY, 0))
183                         continue;
184
185                 cache = btrfs_lookup_block_group(root->fs_info, start);
186                 BUG_ON(!cache);
187                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM ||
188                     last > cache->key.objectid + cache->key.offset) {
189                         last = cache->key.objectid + cache->key.offset;
190                         continue;
191                 }
192
193                 clear_extent_dirty(&root->fs_info->free_space_cache,
194                                    start, start + num_bytes - 1, 0);
195
196                 ins->objectid = start;
197                 ins->offset = num_bytes;
198                 ins->type = BTRFS_EXTENT_ITEM_KEY;
199                 return 0;
200         }
201 fail:
202         fprintf(stderr, "not enough free space\n");
203         return -ENOSPC;
204 }
205
206 static int intersect_with_sb(u64 bytenr, u64 num_bytes)
207 {
208         int i;
209         u64 offset;
210
211         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
212                 offset = btrfs_sb_offset(i);
213                 offset &= ~((u64)STRIPE_LEN - 1);
214
215                 if (bytenr < offset + STRIPE_LEN &&
216                     bytenr + num_bytes > offset)
217                         return 1;
218         }
219         return 0;
220 }
221
222 static int custom_free_extent(struct btrfs_root *root, u64 bytenr,
223                               u64 num_bytes)
224 {
225         return intersect_with_sb(bytenr, num_bytes);
226 }
227
228 struct btrfs_extent_ops extent_ops = {
229         .alloc_extent = custom_alloc_extent,
230         .free_extent = custom_free_extent,
231 };
232
233 struct dir_iterate_data {
234         struct btrfs_trans_handle *trans;
235         struct btrfs_root *root;
236         struct btrfs_inode_item *inode;
237         u64 objectid;
238         u64 index_cnt;
239         u64 parent;
240         int errcode;
241 };
242
243 static u8 filetype_conversion_table[EXT2_FT_MAX] = {
244         [EXT2_FT_UNKNOWN]       = BTRFS_FT_UNKNOWN,
245         [EXT2_FT_REG_FILE]      = BTRFS_FT_REG_FILE,
246         [EXT2_FT_DIR]           = BTRFS_FT_DIR,
247         [EXT2_FT_CHRDEV]        = BTRFS_FT_CHRDEV,
248         [EXT2_FT_BLKDEV]        = BTRFS_FT_BLKDEV,
249         [EXT2_FT_FIFO]          = BTRFS_FT_FIFO,
250         [EXT2_FT_SOCK]          = BTRFS_FT_SOCK,
251         [EXT2_FT_SYMLINK]       = BTRFS_FT_SYMLINK,
252 };
253
254 static int dir_iterate_proc(ext2_ino_t dir, int entry,
255                             struct ext2_dir_entry *dirent,
256                             int offset, int blocksize,
257                             char *buf,void *priv_data)
258 {
259         int ret;
260         int file_type;
261         u64 objectid;
262         u64 inode_size;
263         char dotdot[] = "..";
264         struct btrfs_key location;
265         struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data;
266         int name_len;
267
268         name_len = dirent->name_len & 0xFF;
269
270         objectid = dirent->inode + INO_OFFSET;
271         if (!strncmp(dirent->name, dotdot, name_len)) {
272                 if (name_len == 2) {
273                         BUG_ON(idata->parent != 0);
274                         idata->parent = objectid;
275                 }
276                 return 0;
277         }
278         if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO)
279                 return 0;
280
281         location.objectid = objectid;
282         location.offset = 0;
283         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
284
285         file_type = dirent->name_len >> 8;
286         BUG_ON(file_type > EXT2_FT_SYMLINK);
287         ret = btrfs_insert_dir_item(idata->trans, idata->root,
288                                     dirent->name, name_len,
289                                     idata->objectid, &location,
290                                     filetype_conversion_table[file_type],
291                                     idata->index_cnt);
292         if (ret)
293                 goto fail;
294         ret = btrfs_insert_inode_ref(idata->trans, idata->root,
295                                      dirent->name, name_len,
296                                      objectid, idata->objectid,
297                                      idata->index_cnt);
298         if (ret)
299                 goto fail;
300         idata->index_cnt++;
301         inode_size = btrfs_stack_inode_size(idata->inode) +
302                      name_len * 2;
303         btrfs_set_stack_inode_size(idata->inode, inode_size);
304         return 0;
305 fail:
306         idata->errcode = ret;
307         return BLOCK_ABORT;
308 }
309
310 static int create_dir_entries(struct btrfs_trans_handle *trans,
311                               struct btrfs_root *root, u64 objectid,
312                               struct btrfs_inode_item *btrfs_inode,
313                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
314 {
315         int ret;
316         errcode_t err;
317         struct dir_iterate_data data = {
318                 .trans          = trans,
319                 .root           = root,
320                 .inode          = btrfs_inode,
321                 .objectid       = objectid,
322                 .index_cnt      = 2,
323                 .parent         = 0,
324                 .errcode        = 0,
325         };
326
327         err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL,
328                                   dir_iterate_proc, &data);
329         if (err)
330                 goto error;
331         ret = data.errcode;
332         if (ret == 0 && data.parent == objectid) {
333                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
334                                              objectid, objectid, 0);
335         }
336         return ret;
337 error:
338         fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err));
339         return -1;
340 }
341
342 static int read_disk_extent(struct btrfs_root *root, u64 bytenr,
343                             u32 num_bytes, char *buffer)
344 {
345         int ret;
346         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
347
348         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr);
349         if (ret != num_bytes)
350                 goto fail;
351         ret = 0;
352 fail:
353         if (ret > 0)
354                 ret = -1;
355         return ret;
356 }
357 /*
358  * Record a file extent. Do all the required works, such as inserting
359  * file extent item, inserting extent item and backref item into extent
360  * tree and updating block accounting.
361  */
362 static int record_file_extent(struct btrfs_trans_handle *trans,
363                               struct btrfs_root *root, u64 objectid,
364                               struct btrfs_inode_item *inode,
365                               u64 file_pos, u64 disk_bytenr,
366                               u64 num_bytes, int checksum)
367 {
368         int ret;
369         struct btrfs_fs_info *info = root->fs_info;
370         struct btrfs_root *extent_root = info->extent_root;
371         struct extent_buffer *leaf;
372         struct btrfs_file_extent_item *fi;
373         struct btrfs_key ins_key;
374         struct btrfs_path path;
375         struct btrfs_extent_item *ei;
376         u32 blocksize = root->sectorsize;
377         u64 nbytes;
378
379         if (disk_bytenr == 0) {
380                 ret = btrfs_insert_file_extent(trans, root, objectid,
381                                                 file_pos, disk_bytenr,
382                                                 num_bytes, num_bytes);
383                 return ret;
384         }
385
386         btrfs_init_path(&path);
387
388         if (checksum) {
389                 u64 offset;
390                 char *buffer;
391
392                 ret = -ENOMEM;
393                 buffer = malloc(blocksize);
394                 if (!buffer)
395                         goto fail;
396                 for (offset = 0; offset < num_bytes; offset += blocksize) {
397                         ret = read_disk_extent(root, disk_bytenr + offset,
398                                                 blocksize, buffer);
399                         if (ret)
400                                 break;
401                         ret = btrfs_csum_file_block(trans,
402                                                     root->fs_info->csum_root,
403                                                     disk_bytenr + num_bytes,
404                                                     disk_bytenr + offset,
405                                                     buffer, blocksize);
406                         if (ret)
407                                 break;
408                 }
409                 free(buffer);
410                 if (ret)
411                         goto fail;
412         }
413
414         ins_key.objectid = objectid;
415         ins_key.offset = file_pos;
416         btrfs_set_key_type(&ins_key, BTRFS_EXTENT_DATA_KEY);
417         ret = btrfs_insert_empty_item(trans, root, &path, &ins_key,
418                                       sizeof(*fi));
419         if (ret)
420                 goto fail;
421         leaf = path.nodes[0];
422         fi = btrfs_item_ptr(leaf, path.slots[0],
423                             struct btrfs_file_extent_item);
424         btrfs_set_file_extent_generation(leaf, fi, trans->transid);
425         btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
426         btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
427         btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
428         btrfs_set_file_extent_offset(leaf, fi, 0);
429         btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
430         btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
431         btrfs_set_file_extent_compression(leaf, fi, 0);
432         btrfs_set_file_extent_encryption(leaf, fi, 0);
433         btrfs_set_file_extent_other_encoding(leaf, fi, 0);
434         btrfs_mark_buffer_dirty(leaf);
435
436         nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes;
437         btrfs_set_stack_inode_nbytes(inode, nbytes);
438
439         btrfs_release_path(root, &path);
440
441         ins_key.objectid = disk_bytenr;
442         ins_key.offset = num_bytes;
443         ins_key.type = BTRFS_EXTENT_ITEM_KEY;
444
445         ret = btrfs_insert_empty_item(trans, extent_root, &path,
446                                       &ins_key, sizeof(*ei));
447         if (ret == 0) {
448                 leaf = path.nodes[0];
449                 ei = btrfs_item_ptr(leaf, path.slots[0],
450                                     struct btrfs_extent_item);
451
452                 btrfs_set_extent_refs(leaf, ei, 0);
453                 btrfs_set_extent_generation(leaf, ei, 0);
454                 btrfs_set_extent_flags(leaf, ei, BTRFS_EXTENT_FLAG_DATA);
455
456                 btrfs_mark_buffer_dirty(leaf);
457
458                 ret = btrfs_update_block_group(trans, root, disk_bytenr,
459                                                num_bytes, 1, 0);
460                 if (ret)
461                         goto fail;
462         } else if (ret != -EEXIST) {
463                 goto fail;
464         }
465         btrfs_extent_post_op(trans, extent_root);
466
467         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, 0,
468                                    root->root_key.objectid,
469                                    objectid, file_pos);
470         if (ret)
471                 goto fail;
472         ret = 0;
473 fail:
474         btrfs_release_path(root, &path);
475         return ret;
476 }
477
478 static int record_file_blocks(struct btrfs_trans_handle *trans,
479                               struct btrfs_root *root, u64 objectid,
480                               struct btrfs_inode_item *inode,
481                               u64 file_block, u64 disk_block,
482                               u64 num_blocks, int checksum)
483 {
484         u64 file_pos = file_block * root->sectorsize;
485         u64 disk_bytenr = disk_block * root->sectorsize;
486         u64 num_bytes = num_blocks * root->sectorsize;
487         return record_file_extent(trans, root, objectid, inode, file_pos,
488                                   disk_bytenr, num_bytes, checksum);
489 }
490
491 struct blk_iterate_data {
492         struct btrfs_trans_handle *trans;
493         struct btrfs_root *root;
494         struct btrfs_inode_item *inode;
495         u64 objectid;
496         u64 first_block;
497         u64 disk_block;
498         u64 num_blocks;
499         u64 boundary;
500         int checksum;
501         int errcode;
502 };
503
504 static int block_iterate_proc(ext2_filsys ext2_fs,
505                               u64 disk_block, u64 file_block,
506                               struct blk_iterate_data *idata)
507 {
508         int ret;
509         int sb_region;
510         int do_barrier;
511         struct btrfs_root *root = idata->root;
512         struct btrfs_trans_handle *trans = idata->trans;
513         struct btrfs_block_group_cache *cache;
514         u64 bytenr = disk_block * root->sectorsize;
515
516         sb_region = intersect_with_sb(bytenr, root->sectorsize);
517         do_barrier = sb_region || disk_block >= idata->boundary;
518         if ((idata->num_blocks > 0 && do_barrier) ||
519             (file_block > idata->first_block + idata->num_blocks) ||
520             (disk_block != idata->disk_block + idata->num_blocks)) {
521                 if (idata->num_blocks > 0) {
522                         ret = record_file_blocks(trans, root, idata->objectid,
523                                         idata->inode, idata->first_block,
524                                         idata->disk_block, idata->num_blocks,
525                                         idata->checksum);
526                         if (ret)
527                                 goto fail;
528                         idata->first_block += idata->num_blocks;
529                         idata->num_blocks = 0;
530                 }
531                 if (file_block > idata->first_block) {
532                         ret = record_file_blocks(trans, root, idata->objectid,
533                                         idata->inode, idata->first_block,
534                                         0, file_block - idata->first_block,
535                                         idata->checksum);
536                         if (ret)
537                                 goto fail;
538                 }
539
540                 if (sb_region) {
541                         bytenr += STRIPE_LEN - 1;
542                         bytenr &= ~((u64)STRIPE_LEN - 1);
543                 } else {
544                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
545                         BUG_ON(!cache);
546                         bytenr = cache->key.objectid + cache->key.offset;
547                 }
548
549                 idata->first_block = file_block;
550                 idata->disk_block = disk_block;
551                 idata->boundary = bytenr / root->sectorsize;
552         }
553         idata->num_blocks++;
554         return 0;
555 fail:
556         idata->errcode = ret;
557         return BLOCK_ABORT;
558 }
559
560 static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr,
561                                 e2_blkcnt_t blockcnt, blk_t ref_block,
562                                 int ref_offset, void *priv_data)
563 {
564         struct blk_iterate_data *idata;
565         idata = (struct blk_iterate_data *)priv_data;
566         return block_iterate_proc(fs, *blocknr, blockcnt, idata);
567 }
568
569 /*
570  * traverse file's data blocks, record these data blocks as file extents.
571  */
572 static int create_file_extents(struct btrfs_trans_handle *trans,
573                                struct btrfs_root *root, u64 objectid,
574                                struct btrfs_inode_item *btrfs_inode,
575                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
576                                int datacsum, int packing)
577 {
578         int ret;
579         char *buffer = NULL;
580         errcode_t err;
581         u32 last_block;
582         u32 sectorsize = root->sectorsize;
583         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
584         struct blk_iterate_data data = {
585                 .trans          = trans,
586                 .root           = root,
587                 .inode          = btrfs_inode,
588                 .objectid       = objectid,
589                 .first_block    = 0,
590                 .disk_block     = 0,
591                 .num_blocks     = 0,
592                 .boundary       = (u64)-1,
593                 .checksum       = datacsum,
594                 .errcode        = 0,
595         };
596         err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY,
597                                     NULL, __block_iterate_proc, &data);
598         if (err)
599                 goto error;
600         ret = data.errcode;
601         if (ret)
602                 goto fail;
603         if (packing && data.first_block == 0 && data.num_blocks > 0 &&
604             inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
605                 u64 num_bytes = data.num_blocks * sectorsize;
606                 u64 disk_bytenr = data.disk_block * sectorsize;
607                 u64 nbytes;
608
609                 buffer = malloc(num_bytes);
610                 if (!buffer)
611                         return -ENOMEM;
612                 ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer);
613                 if (ret)
614                         goto fail;
615                 if (num_bytes > inode_size)
616                         num_bytes = inode_size;
617                 ret = btrfs_insert_inline_extent(trans, root, objectid,
618                                                  0, buffer, num_bytes);
619                 if (ret)
620                         goto fail;
621                 nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes;
622                 btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes);
623         } else if (data.num_blocks > 0) {
624                 ret = record_file_blocks(trans, root, objectid, btrfs_inode,
625                                          data.first_block, data.disk_block,
626                                          data.num_blocks, data.checksum);
627                 if (ret)
628                         goto fail;
629         }
630         data.first_block += data.num_blocks;
631         last_block = (inode_size + sectorsize - 1) / sectorsize;
632         if (last_block > data.first_block) {
633                 ret = record_file_blocks(trans, root, objectid, btrfs_inode,
634                                          data.first_block, 0, last_block -
635                                          data.first_block, data.checksum);
636         }
637 fail:
638         if (buffer)
639                 free(buffer);
640         return ret;
641 error:
642         fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err));
643         return -1;
644 }
645
646 static int create_symbol_link(struct btrfs_trans_handle *trans,
647                               struct btrfs_root *root, u64 objectid,
648                               struct btrfs_inode_item *btrfs_inode,
649                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
650                               struct ext2_inode *ext2_inode)
651 {
652         int ret;
653         char *pathname;
654         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
655         if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) {
656                 btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1);
657                 ret = create_file_extents(trans, root, objectid, btrfs_inode,
658                                           ext2_fs, ext2_ino, 1, 1);
659                 btrfs_set_stack_inode_size(btrfs_inode, inode_size);
660                 return ret;
661         }
662
663         pathname = (char *)&(ext2_inode->i_block[0]);
664         BUG_ON(pathname[inode_size] != 0);
665         ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
666                                          pathname, inode_size + 1);
667         btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1);
668         return ret;
669 }
670
671 /*
672  * Following xattr/acl related codes are based on codes in
673  * fs/ext3/xattr.c and fs/ext3/acl.c
674  */
675 #define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr))
676 #define EXT2_XATTR_BFIRST(ptr) \
677         ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1))
678 #define EXT2_XATTR_IHDR(inode) \
679         ((struct ext2_ext_attr_header *) ((void *)(inode) + \
680                 EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize))
681 #define EXT2_XATTR_IFIRST(inode) \
682         ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \
683                 sizeof(EXT2_XATTR_IHDR(inode)->h_magic)))
684
685 static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry,
686                                   const void *end)
687 {
688         struct ext2_ext_attr_entry *next;
689
690         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
691                 next = EXT2_EXT_ATTR_NEXT(entry);
692                 if ((void *)next >= end)
693                         return -EIO;
694                 entry = next;
695         }
696         return 0;
697 }
698
699 static int ext2_xattr_check_block(const char *buf, size_t size)
700 {
701         int error;
702         struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf);
703
704         if (header->h_magic != EXT2_EXT_ATTR_MAGIC ||
705             header->h_blocks != 1)
706                 return -EIO;
707         error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size);
708         return error;
709 }
710
711 static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry,
712                                   size_t size)
713 {
714         size_t value_size = entry->e_value_size;
715
716         if (entry->e_value_block != 0 || value_size > size ||
717             entry->e_value_offs + value_size > size)
718                 return -EIO;
719         return 0;
720 }
721
722 #define EXT2_ACL_VERSION        0x0001
723
724 typedef struct {
725         __le16          e_tag;
726         __le16          e_perm;
727         __le32          e_id;
728 } ext2_acl_entry;
729
730 typedef struct {
731         __le16          e_tag;
732         __le16          e_perm;
733 } ext2_acl_entry_short;
734
735 typedef struct {
736         __le32          a_version;
737 } ext2_acl_header;
738
739 static inline int ext2_acl_count(size_t size)
740 {
741         ssize_t s;
742         size -= sizeof(ext2_acl_header);
743         s = size - 4 * sizeof(ext2_acl_entry_short);
744         if (s < 0) {
745                 if (size % sizeof(ext2_acl_entry_short))
746                         return -1;
747                 return size / sizeof(ext2_acl_entry_short);
748         } else {
749                 if (s % sizeof(ext2_acl_entry))
750                         return -1;
751                 return s / sizeof(ext2_acl_entry) + 4;
752         }
753 }
754
755 #define ACL_EA_VERSION          0x0002
756
757 typedef struct {
758         __le16          e_tag;
759         __le16          e_perm;
760         __le32          e_id;
761 } acl_ea_entry;
762
763 typedef struct {
764         __le32          a_version;
765         acl_ea_entry    a_entries[0];
766 } acl_ea_header;
767
768 static inline size_t acl_ea_size(int count)
769 {
770         return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry);
771 }
772
773 static int ext2_acl_to_xattr(void *dst, const void *src,
774                              size_t dst_size, size_t src_size)
775 {
776         int i, count;
777         const void *end = src + src_size;
778         acl_ea_header *ext_acl = (acl_ea_header *)dst;
779         acl_ea_entry *dst_entry = ext_acl->a_entries;
780         ext2_acl_entry *src_entry;
781
782         if (src_size < sizeof(ext2_acl_header))
783                 goto fail;
784         if (((ext2_acl_header *)src)->a_version !=
785             cpu_to_le32(EXT2_ACL_VERSION))
786                 goto fail;
787         src += sizeof(ext2_acl_header);
788         count = ext2_acl_count(src_size);
789         if (count <= 0)
790                 goto fail;
791
792         BUG_ON(dst_size < acl_ea_size(count));
793         ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION);
794         for (i = 0; i < count; i++, dst_entry++) {
795                 src_entry = (ext2_acl_entry *)src;
796                 if (src + sizeof(ext2_acl_entry_short) > end)
797                         goto fail;
798                 dst_entry->e_tag = src_entry->e_tag;
799                 dst_entry->e_perm = src_entry->e_perm;
800                 switch (le16_to_cpu(src_entry->e_tag)) {
801                 case ACL_USER_OBJ:
802                 case ACL_GROUP_OBJ:
803                 case ACL_MASK:
804                 case ACL_OTHER:
805                         src += sizeof(ext2_acl_entry_short);
806                         dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
807                         break;
808                 case ACL_USER:
809                 case ACL_GROUP:
810                         src += sizeof(ext2_acl_entry);
811                         if (src > end)
812                                 goto fail;
813                         dst_entry->e_id = src_entry->e_id;
814                         break;
815                 default:
816                         goto fail;
817                 }
818         }
819         if (src != end)
820                 goto fail;
821         return 0;
822 fail:
823         return -EINVAL;
824 }
825
826 static char *xattr_prefix_table[] = {
827         [1] =   "user.",
828         [2] =   "system.posix_acl_access",
829         [3] =   "system.posix_acl_default",
830         [4] =   "trusted.",
831         [6] =   "security.",
832 };
833
834 static int copy_single_xattr(struct btrfs_trans_handle *trans,
835                              struct btrfs_root *root, u64 objectid,
836                              struct ext2_ext_attr_entry *entry,
837                              const void *data, u32 datalen)
838 {
839         int ret = 0;
840         int name_len;
841         int name_index;
842         void *databuf = NULL;
843         char namebuf[XATTR_NAME_MAX + 1];
844
845         name_index = entry->e_name_index;
846         if (name_index >= ARRAY_SIZE(xattr_prefix_table) ||
847             xattr_prefix_table[name_index] == NULL)
848                 return -EOPNOTSUPP;
849         name_len = strlen(xattr_prefix_table[name_index]) +
850                    entry->e_name_len;
851         if (name_len >= sizeof(namebuf))
852                 return -ERANGE;
853
854         if (name_index == 2 || name_index == 3) {
855                 size_t bufsize = acl_ea_size(ext2_acl_count(datalen));
856                 databuf = malloc(bufsize);
857                 if (!databuf)
858                        return -ENOMEM;
859                 ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen);
860                 if (ret)
861                         goto out;
862                 data = databuf;
863                 datalen = bufsize;
864         }
865         strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX);
866         strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len);
867         if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) -
868             sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) {
869                 fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n",
870                         objectid - INO_OFFSET, name_len, namebuf);
871                 goto out;
872         }
873         ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len,
874                                       data, datalen, objectid);
875 out:
876         if (databuf)
877                 free(databuf);
878         return ret;
879 }
880
881 static int copy_extended_attrs(struct btrfs_trans_handle *trans,
882                                struct btrfs_root *root, u64 objectid,
883                                struct btrfs_inode_item *btrfs_inode,
884                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
885 {
886         int ret = 0;
887         int inline_ea = 0;
888         errcode_t err;
889         u32 datalen;
890         u32 block_size = ext2_fs->blocksize;
891         u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super);
892         struct ext2_inode_large *ext2_inode;
893         struct ext2_ext_attr_entry *entry;
894         void *data;
895         char *buffer = NULL;
896         char inode_buf[EXT2_GOOD_OLD_INODE_SIZE];
897
898         if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) {
899                 ext2_inode = (struct ext2_inode_large *)inode_buf;
900         } else {
901                 ext2_inode = (struct ext2_inode_large *)malloc(inode_size);
902                 if (!ext2_inode)
903                        return -ENOMEM;
904         }
905         err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode,
906                                      inode_size);
907         if (err) {
908                 fprintf(stderr, "ext2fs_read_inode_full: %s\n",
909                         error_message(err));
910                 ret = -1;
911                 goto out;
912         }
913
914         if (ext2_ino > ext2_fs->super->s_first_ino &&
915             inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
916                 if (EXT2_GOOD_OLD_INODE_SIZE +
917                     ext2_inode->i_extra_isize > inode_size) {
918                         ret = -EIO;
919                         goto out;
920                 }
921                 if (ext2_inode->i_extra_isize != 0 &&
922                     EXT2_XATTR_IHDR(ext2_inode)->h_magic ==
923                     EXT2_EXT_ATTR_MAGIC) {
924                         inline_ea = 1;
925                 }
926         }
927         if (inline_ea) {
928                 int total;
929                 void *end = (void *)ext2_inode + inode_size;
930                 entry = EXT2_XATTR_IFIRST(ext2_inode);
931                 total = end - (void *)entry;
932                 ret = ext2_xattr_check_names(entry, end);
933                 if (ret)
934                         goto out;
935                 while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
936                         ret = ext2_xattr_check_entry(entry, total);
937                         if (ret)
938                                 goto out;
939                         data = (void *)EXT2_XATTR_IFIRST(ext2_inode) +
940                                 entry->e_value_offs;
941                         datalen = entry->e_value_size;
942                         ret = copy_single_xattr(trans, root, objectid,
943                                                 entry, data, datalen);
944                         if (ret)
945                                 goto out;
946                         entry = EXT2_EXT_ATTR_NEXT(entry);
947                 }
948         }
949
950         if (ext2_inode->i_file_acl == 0)
951                 goto out;
952
953         buffer = malloc(block_size);
954         if (!buffer) {
955                 ret = -ENOMEM;
956                 goto out;
957         }
958         err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer);
959         if (err) {
960                 fprintf(stderr, "ext2fs_read_ext_attr: %s\n",
961                         error_message(err));
962                 ret = -1;
963                 goto out;
964         }
965         ret = ext2_xattr_check_block(buffer, block_size);
966         if (ret)
967                 goto out;
968
969         entry = EXT2_XATTR_BFIRST(buffer);
970         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
971                 ret = ext2_xattr_check_entry(entry, block_size);
972                 if (ret)
973                         goto out;
974                 data = buffer + entry->e_value_offs;
975                 datalen = entry->e_value_size;
976                 ret = copy_single_xattr(trans, root, objectid,
977                                         entry, data, datalen);
978                 if (ret)
979                         goto out;
980                 entry = EXT2_EXT_ATTR_NEXT(entry);
981         }
982 out:
983         if (buffer != NULL)
984                 free(buffer);
985         if ((void *)ext2_inode != inode_buf)
986                 free(ext2_inode);
987         return ret;
988 }
989 #define MINORBITS       20
990 #define MKDEV(ma, mi)   (((ma) << MINORBITS) | (mi))
991
992 static inline dev_t old_decode_dev(u16 val)
993 {
994         return MKDEV((val >> 8) & 255, val & 255);
995 }
996
997 static inline dev_t new_decode_dev(u32 dev)
998 {
999         unsigned major = (dev & 0xfff00) >> 8;
1000         unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
1001         return MKDEV(major, minor);
1002 }
1003
1004 static int copy_inode_item(struct btrfs_inode_item *dst,
1005                            struct ext2_inode *src, u32 blocksize)
1006 {
1007         btrfs_set_stack_inode_generation(dst, 1);
1008         btrfs_set_stack_inode_size(dst, src->i_size);
1009         btrfs_set_stack_inode_nbytes(dst, 0);
1010         btrfs_set_stack_inode_block_group(dst, 0);
1011         btrfs_set_stack_inode_nlink(dst, src->i_links_count);
1012         btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16));
1013         btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16));
1014         btrfs_set_stack_inode_mode(dst, src->i_mode);
1015         btrfs_set_stack_inode_rdev(dst, 0);
1016         btrfs_set_stack_inode_flags(dst, 0);
1017         btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime);
1018         btrfs_set_stack_timespec_nsec(&dst->atime, 0);
1019         btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime);
1020         btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
1021         btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime);
1022         btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
1023         btrfs_set_stack_timespec_sec(&dst->otime, 0);
1024         btrfs_set_stack_timespec_nsec(&dst->otime, 0);
1025
1026         if (S_ISDIR(src->i_mode)) {
1027                 btrfs_set_stack_inode_size(dst, 0);
1028                 btrfs_set_stack_inode_nlink(dst, 1);
1029         }
1030         if (S_ISREG(src->i_mode)) {
1031                 btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 |
1032                                            (u64)src->i_size);
1033         }
1034         if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) &&
1035             !S_ISLNK(src->i_mode)) {
1036                 if (src->i_block[0]) {
1037                         btrfs_set_stack_inode_rdev(dst,
1038                                 old_decode_dev(src->i_block[0]));
1039                 } else {
1040                         btrfs_set_stack_inode_rdev(dst,
1041                                 new_decode_dev(src->i_block[1]));
1042                 }
1043         }
1044         return 0;
1045 }
1046
1047 /*
1048  * copy a single inode. do all the required works, such as cloning
1049  * inode item, creating file extents and creating directory entries.
1050  */
1051 static int copy_single_inode(struct btrfs_trans_handle *trans,
1052                              struct btrfs_root *root, u64 objectid,
1053                              ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
1054                              struct ext2_inode *ext2_inode,
1055                              int datacsum, int packing, int noxattr)
1056 {
1057         int ret;
1058         struct btrfs_key inode_key;
1059         struct btrfs_inode_item btrfs_inode;
1060
1061         if (ext2_inode->i_links_count == 0)
1062                 return 0;
1063
1064         copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize);
1065         if (!datacsum && S_ISREG(ext2_inode->i_mode)) {
1066                 u32 flags = btrfs_stack_inode_flags(&btrfs_inode) |
1067                             BTRFS_INODE_NODATASUM;
1068                 btrfs_set_stack_inode_flags(&btrfs_inode, flags);
1069         }
1070
1071         switch (ext2_inode->i_mode & S_IFMT) {
1072         case S_IFREG:
1073                 ret = create_file_extents(trans, root, objectid, &btrfs_inode,
1074                                         ext2_fs, ext2_ino, datacsum, packing);
1075                 break;
1076         case S_IFDIR:
1077                 ret = create_dir_entries(trans, root, objectid, &btrfs_inode,
1078                                          ext2_fs, ext2_ino);
1079                 break;
1080         case S_IFLNK:
1081                 ret = create_symbol_link(trans, root, objectid, &btrfs_inode,
1082                                          ext2_fs, ext2_ino, ext2_inode);
1083                 break;
1084         default:
1085                 ret = 0;
1086                 break;
1087         }
1088         if (ret)
1089                 return ret;
1090
1091         if (!noxattr) {
1092                 ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode,
1093                                           ext2_fs, ext2_ino);
1094                 if (ret)
1095                         return ret;
1096         }
1097         inode_key.objectid = objectid;
1098         inode_key.offset = 0;
1099         btrfs_set_key_type(&inode_key, BTRFS_INODE_ITEM_KEY);
1100         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1101         return ret;
1102 }
1103
1104 static int copy_disk_extent(struct btrfs_root *root, u64 dst_bytenr,
1105                             u64 src_bytenr, u32 num_bytes)
1106 {
1107         int ret;
1108         char *buffer;
1109         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
1110
1111         buffer = malloc(num_bytes);
1112         if (!buffer)
1113                 return -ENOMEM;
1114         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, src_bytenr);
1115         if (ret != num_bytes)
1116                 goto fail;
1117         ret = pwrite(fs_devs->latest_bdev, buffer, num_bytes, dst_bytenr);
1118         if (ret != num_bytes)
1119                 goto fail;
1120         ret = 0;
1121 fail:
1122         free(buffer);
1123         if (ret > 0)
1124                 ret = -1;
1125         return ret;
1126 }
1127 /*
1128  * scan ext2's inode bitmap and copy all used inodes.
1129  */
1130 static int copy_inodes(struct btrfs_root *root, ext2_filsys ext2_fs,
1131                        int datacsum, int packing, int noxattr)
1132 {
1133         int ret;
1134         errcode_t err;
1135         ext2_inode_scan ext2_scan;
1136         struct ext2_inode ext2_inode;
1137         ext2_ino_t ext2_ino;
1138         u64 objectid;
1139         struct btrfs_trans_handle *trans;
1140
1141         trans = btrfs_start_transaction(root, 1);
1142         if (!trans)
1143                 return -ENOMEM;
1144         err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan);
1145         if (err) {
1146                 fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err));
1147                 return -1;
1148         }
1149         while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino,
1150                                              &ext2_inode))) {
1151                 /* no more inodes */
1152                 if (ext2_ino == 0)
1153                         break;
1154                 /* skip special inode in ext2fs */
1155                 if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO &&
1156                     ext2_ino != EXT2_ROOT_INO)
1157                         continue;
1158                 objectid = ext2_ino + INO_OFFSET;
1159                 ret = copy_single_inode(trans, root,
1160                                         objectid, ext2_fs, ext2_ino,
1161                                         &ext2_inode, datacsum, packing,
1162                                         noxattr);
1163                 if (ret)
1164                         return ret;
1165                 if (trans->blocks_used >= 4096) {
1166                         ret = btrfs_commit_transaction(trans, root);
1167                         BUG_ON(ret);
1168                         trans = btrfs_start_transaction(root, 1);
1169                         BUG_ON(!trans);
1170                 }
1171         }
1172         if (err) {
1173                 fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err));
1174                 return -1;
1175         }
1176         ret = btrfs_commit_transaction(trans, root);
1177         BUG_ON(ret);
1178
1179         return ret;
1180 }
1181
1182 /*
1183  * Construct a range of ext2fs image file.
1184  * scan block allocation bitmap, find all blocks used by the ext2fs
1185  * in this range and create file extents that point to these blocks.
1186  *
1187  * Note: Before calling the function, no file extent points to blocks
1188  *       in this range
1189  */
1190 static int create_image_file_range(struct btrfs_trans_handle *trans,
1191                                    struct btrfs_root *root, u64 objectid,
1192                                    struct btrfs_inode_item *inode,
1193                                    u64 start_byte, u64 end_byte,
1194                                    ext2_filsys ext2_fs)
1195 {
1196         u32 blocksize = ext2_fs->blocksize;
1197         u32 block = start_byte / blocksize;
1198         u32 last_block = (end_byte + blocksize - 1) / blocksize;
1199         int ret = 0;
1200         struct blk_iterate_data data = {
1201                 .trans          = trans,
1202                 .root           = root,
1203                 .inode          = inode,
1204                 .objectid       = objectid,
1205                 .first_block    = block,
1206                 .disk_block     = 0,
1207                 .num_blocks     = 0,
1208                 .boundary       = (u64)-1,
1209                 .checksum       = 0,
1210                 .errcode        = 0,
1211         };
1212         for (; start_byte < end_byte; block++, start_byte += blocksize) {
1213                 if (!ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block))
1214                         continue;
1215                 ret = block_iterate_proc(NULL, block, block, &data);
1216                 if (ret & BLOCK_ABORT) {
1217                         ret = data.errcode;
1218                         goto fail;
1219                 }
1220         }
1221         if (data.num_blocks > 0) {
1222                 ret = record_file_blocks(trans, root, objectid, inode,
1223                                          data.first_block, data.disk_block,
1224                                          data.num_blocks, 0);
1225                 if (ret)
1226                         goto fail;
1227                 data.first_block += data.num_blocks;
1228         }
1229         if (last_block > data.first_block) {
1230                 ret = record_file_blocks(trans, root, objectid, inode,
1231                                          data.first_block, 0, last_block -
1232                                          data.first_block, 0);
1233                 if (ret)
1234                         goto fail;
1235         }
1236 fail:
1237         return ret;
1238 }
1239 /*
1240  * Create the ext2fs image file.
1241  */
1242 static int create_ext2_image(struct btrfs_root *root, ext2_filsys ext2_fs,
1243                              const char *name)
1244 {
1245         int ret;
1246         struct btrfs_key key;
1247         struct btrfs_key location;
1248         struct btrfs_path path;
1249         struct btrfs_inode_item btrfs_inode;
1250         struct btrfs_inode_item *inode_item;
1251         struct extent_buffer *leaf;
1252         struct btrfs_fs_info *fs_info = root->fs_info;
1253         struct btrfs_root *extent_root = fs_info->extent_root;
1254         struct btrfs_trans_handle *trans;
1255         struct btrfs_extent_item *ei;
1256         struct btrfs_extent_inline_ref *iref;
1257         struct btrfs_extent_data_ref *dref;
1258         u64 bytenr;
1259         u64 num_bytes;
1260         u64 objectid;
1261         u64 last_byte;
1262         u64 first_free;
1263         u64 total_bytes;
1264         u32 sectorsize = root->sectorsize;
1265
1266         total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
1267         first_free =  BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1;
1268         first_free &= ~((u64)sectorsize - 1);
1269
1270         memset(&btrfs_inode, 0, sizeof(btrfs_inode));
1271         btrfs_set_stack_inode_generation(&btrfs_inode, 1);
1272         btrfs_set_stack_inode_size(&btrfs_inode, total_bytes);
1273         btrfs_set_stack_inode_nlink(&btrfs_inode, 1);
1274         btrfs_set_stack_inode_nbytes(&btrfs_inode, 0);
1275         btrfs_set_stack_inode_mode(&btrfs_inode, S_IFREG | 0400);
1276         btrfs_set_stack_inode_flags(&btrfs_inode, BTRFS_INODE_NODATASUM |
1277                                     BTRFS_INODE_READONLY);
1278         btrfs_init_path(&path);
1279         trans = btrfs_start_transaction(root, 1);
1280         BUG_ON(!trans);
1281
1282         objectid = btrfs_root_dirid(&root->root_item);
1283         ret = btrfs_find_free_objectid(trans, root, objectid, &objectid);
1284         if (ret)
1285                 goto fail;
1286
1287         /*
1288          * copy blocks covered by extent #0 to new positions. extent #0 is
1289          * special, we can't rely on relocate_extents_range to relocate it.
1290          */
1291         for (last_byte = 0; last_byte < first_free; last_byte += sectorsize) {
1292                 ret = custom_alloc_extent(root, sectorsize, 0, &key);
1293                 if (ret)
1294                         goto fail;
1295                 ret = copy_disk_extent(root, key.objectid, last_byte,
1296                                        sectorsize);
1297                 if (ret)
1298                         goto fail;
1299                 ret = record_file_extent(trans, root, objectid,
1300                                          &btrfs_inode, last_byte,
1301                                          key.objectid, sectorsize, 0);
1302                 if (ret)
1303                         goto fail;
1304         }
1305
1306         while(1) {
1307                 key.objectid = last_byte;
1308                 key.offset = 0;
1309                 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1310                 ret = btrfs_search_slot(trans, fs_info->extent_root,
1311                                         &key, &path, 0, 0);
1312                 if (ret < 0)
1313                         goto fail;
1314 next:
1315                 leaf = path.nodes[0];
1316                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1317                         ret = btrfs_next_leaf(extent_root, &path);
1318                         if (ret < 0)
1319                                 goto fail;
1320                         if (ret > 0)
1321                                 break;
1322                         leaf = path.nodes[0];
1323                 }
1324                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1325                 if (last_byte > key.objectid ||
1326                     key.type != BTRFS_EXTENT_ITEM_KEY) {
1327                         path.slots[0]++;
1328                         goto next;
1329                 }
1330
1331                 bytenr = key.objectid;
1332                 num_bytes = key.offset;
1333                 ei = btrfs_item_ptr(leaf, path.slots[0],
1334                                     struct btrfs_extent_item);
1335                 if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) {
1336                         path.slots[0]++;
1337                         goto next;
1338                 }
1339
1340                 BUG_ON(btrfs_item_size_nr(leaf, path.slots[0]) != sizeof(*ei) +
1341                        btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY));
1342
1343                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
1344                 key.type = btrfs_extent_inline_ref_type(leaf, iref);
1345                 BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
1346                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1347                 if (btrfs_extent_data_ref_root(leaf, dref) !=
1348                     BTRFS_FS_TREE_OBJECTID) {
1349                         path.slots[0]++;
1350                         goto next;
1351                 }
1352
1353                 if (bytenr > last_byte) {
1354                         ret = create_image_file_range(trans, root, objectid,
1355                                                       &btrfs_inode, last_byte,
1356                                                       bytenr, ext2_fs);
1357                         if (ret)
1358                                 goto fail;
1359                 }
1360                 ret = record_file_extent(trans, root, objectid, &btrfs_inode,
1361                                          bytenr, bytenr, num_bytes, 0);
1362                 if (ret)
1363                         goto fail;
1364                 last_byte = bytenr + num_bytes;
1365                 btrfs_release_path(extent_root, &path);
1366
1367                 if (trans->blocks_used >= 4096) {
1368                         ret = btrfs_commit_transaction(trans, root);
1369                         BUG_ON(ret);
1370                         trans = btrfs_start_transaction(root, 1);
1371                         BUG_ON(!trans);
1372                 }
1373         }
1374         btrfs_release_path(root, &path);
1375         if (total_bytes > last_byte) {
1376                 ret = create_image_file_range(trans, root, objectid,
1377                                               &btrfs_inode, last_byte,
1378                                               total_bytes, ext2_fs);
1379                 if (ret)
1380                         goto fail;
1381         }
1382
1383         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1384         if (ret)
1385                 goto fail;
1386
1387         location.objectid = objectid;
1388         location.offset = 0;
1389         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1390         ret = btrfs_insert_dir_item(trans, root, name, strlen(name),
1391                                     btrfs_root_dirid(&root->root_item),
1392                                     &location, EXT2_FT_REG_FILE, objectid);
1393         if (ret)
1394                 goto fail;
1395         ret = btrfs_insert_inode_ref(trans, root, name, strlen(name),
1396                                      objectid,
1397                                      btrfs_root_dirid(&root->root_item),
1398                                      objectid);
1399         if (ret)
1400                 goto fail;
1401         location.objectid = btrfs_root_dirid(&root->root_item);
1402         location.offset = 0;
1403         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1404         ret = btrfs_lookup_inode(trans, root, &path, &location, 1);
1405         if (ret)
1406                 goto fail;
1407         leaf = path.nodes[0];
1408         inode_item = btrfs_item_ptr(leaf, path.slots[0],
1409                                     struct btrfs_inode_item);
1410         btrfs_set_inode_size(leaf, inode_item, strlen(name) * 2 +
1411                              btrfs_inode_size(leaf, inode_item));
1412         btrfs_mark_buffer_dirty(leaf);
1413         btrfs_release_path(root, &path);
1414         ret = btrfs_commit_transaction(trans, root);
1415         BUG_ON(ret);
1416 fail:
1417         btrfs_release_path(root, &path);
1418         return ret;
1419 }
1420
1421 struct btrfs_root *link_subvol(struct btrfs_root *root, const char *base,
1422                                u64 root_objectid)
1423 {
1424         struct btrfs_trans_handle *trans;
1425         struct btrfs_fs_info *fs_info = root->fs_info;
1426         struct btrfs_root *tree_root = fs_info->tree_root;
1427         struct btrfs_root *new_root = NULL;
1428         struct btrfs_path *path;
1429         struct btrfs_inode_item *inode_item;
1430         struct extent_buffer *leaf;
1431         struct btrfs_key key;
1432         u64 dirid = btrfs_root_dirid(&root->root_item);
1433         u64 index = 2;
1434         char buf[64];
1435         int i;
1436         int ret;
1437
1438         path = btrfs_alloc_path();
1439         BUG_ON(!path);
1440
1441         key.objectid = dirid;
1442         key.type = BTRFS_DIR_INDEX_KEY;
1443         key.offset = (u64)-1;
1444
1445         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1446         BUG_ON(ret <= 0);
1447
1448         if (path->slots[0] > 0) {
1449                 path->slots[0]--;
1450                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1451                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
1452                         index = key.offset + 1;
1453         }
1454         btrfs_release_path(root, path);
1455
1456         trans = btrfs_start_transaction(root, 1);
1457         BUG_ON(!trans);
1458
1459         key.objectid = dirid;
1460         key.offset = 0;
1461         key.type =  BTRFS_INODE_ITEM_KEY;
1462
1463         ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1464         BUG_ON(ret);
1465         leaf = path->nodes[0];
1466         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1467                                     struct btrfs_inode_item);
1468
1469         key.objectid = root_objectid;
1470         key.offset = (u64)-1;
1471         key.type = BTRFS_ROOT_ITEM_KEY;
1472
1473         strcpy(buf, base);
1474         for (i = 0; i < 1024; i++) {
1475                 ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf),
1476                                             dirid, &key, BTRFS_FT_DIR, index);
1477                 if (ret != -EEXIST)
1478                         break;
1479                 sprintf(buf, "%s%d", base, i);
1480         }
1481         if (ret)
1482                 goto fail;
1483
1484         btrfs_set_inode_size(leaf, inode_item, strlen(buf) * 2 +
1485                              btrfs_inode_size(leaf, inode_item));
1486         btrfs_mark_buffer_dirty(leaf);
1487         btrfs_release_path(root, path);
1488
1489         /* add the backref first */
1490         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
1491                                  BTRFS_ROOT_BACKREF_KEY,
1492                                  root->root_key.objectid,
1493                                  dirid, index, buf, strlen(buf));
1494         BUG_ON(ret);
1495
1496         /* now add the forward ref */
1497         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
1498                                  BTRFS_ROOT_REF_KEY, root_objectid,
1499                                  dirid, index, buf, strlen(buf));
1500
1501         ret = btrfs_commit_transaction(trans, root);
1502         BUG_ON(ret);
1503
1504         new_root = btrfs_read_fs_root(fs_info, &key);
1505         if (IS_ERR(new_root))
1506                 new_root = NULL;
1507 fail:
1508         btrfs_free_path(path);
1509         return new_root;
1510 }
1511
1512 static int create_chunk_mapping(struct btrfs_trans_handle *trans,
1513                                 struct btrfs_root *root)
1514 {
1515         struct btrfs_fs_info *info = root->fs_info;
1516         struct btrfs_root *chunk_root = info->chunk_root;
1517         struct btrfs_root *extent_root = info->extent_root;
1518         struct btrfs_device *device;
1519         struct btrfs_block_group_cache *cache;
1520         struct btrfs_dev_extent *extent;
1521         struct extent_buffer *leaf;
1522         struct btrfs_chunk chunk;
1523         struct btrfs_key key;
1524         struct btrfs_path path;
1525         u64 cur_start;
1526         u64 total_bytes;
1527         u64 chunk_objectid;
1528         int ret;
1529
1530         btrfs_init_path(&path);
1531
1532         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1533         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1534
1535         BUG_ON(list_empty(&info->fs_devices->devices));
1536         device = list_entry(info->fs_devices->devices.next,
1537                             struct btrfs_device, dev_list);
1538         BUG_ON(device->devid != info->fs_devices->latest_devid);
1539
1540         /* delete device extent created by make_btrfs */
1541         key.objectid = device->devid;
1542         key.offset = 0;
1543         key.type = BTRFS_DEV_EXTENT_KEY;
1544         ret = btrfs_search_slot(trans, device->dev_root, &key, &path, -1, 1);
1545         if (ret < 0)
1546                 goto err;
1547
1548         BUG_ON(ret > 0);
1549         ret = btrfs_del_item(trans, device->dev_root, &path);
1550         if (ret)
1551                 goto err;
1552         btrfs_release_path(device->dev_root, &path);
1553
1554         /* delete chunk item created by make_btrfs */
1555         key.objectid = chunk_objectid;
1556         key.offset = 0;
1557         key.type = BTRFS_CHUNK_ITEM_KEY;
1558         ret = btrfs_search_slot(trans, chunk_root, &key, &path, -1, 1);
1559         if (ret < 0)
1560                 goto err;
1561
1562         BUG_ON(ret > 0);
1563         ret = btrfs_del_item(trans, chunk_root, &path);
1564         if (ret)
1565                 goto err;
1566         btrfs_release_path(chunk_root, &path);
1567
1568         /* for each block group, create device extent and chunk item */
1569         cur_start = 0;
1570         while (cur_start < total_bytes) {
1571                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
1572                 BUG_ON(!cache);
1573
1574                 /* insert device extent */
1575                 key.objectid = device->devid;
1576                 key.offset = cache->key.objectid;
1577                 key.type = BTRFS_DEV_EXTENT_KEY;
1578                 ret = btrfs_insert_empty_item(trans, device->dev_root, &path,
1579                                               &key, sizeof(*extent));
1580                 if (ret)
1581                         goto err;
1582
1583                 leaf = path.nodes[0];
1584                 extent = btrfs_item_ptr(leaf, path.slots[0],
1585                                         struct btrfs_dev_extent);
1586
1587                 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1588                                                 chunk_root->root_key.objectid);
1589                 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1590                                                     chunk_objectid);
1591                 btrfs_set_dev_extent_chunk_offset(leaf, extent,
1592                                                   cache->key.objectid);
1593                 btrfs_set_dev_extent_length(leaf, extent, cache->key.offset);
1594                 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1595                     (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
1596                     BTRFS_UUID_SIZE);
1597                 btrfs_mark_buffer_dirty(leaf);
1598                 btrfs_release_path(device->dev_root, &path);
1599
1600                 /* insert chunk item */
1601                 btrfs_set_stack_chunk_length(&chunk, cache->key.offset);
1602                 btrfs_set_stack_chunk_owner(&chunk,
1603                                             extent_root->root_key.objectid);
1604                 btrfs_set_stack_chunk_stripe_len(&chunk, STRIPE_LEN);
1605                 btrfs_set_stack_chunk_type(&chunk, cache->flags);
1606                 btrfs_set_stack_chunk_io_align(&chunk, device->io_align);
1607                 btrfs_set_stack_chunk_io_width(&chunk, device->io_width);
1608                 btrfs_set_stack_chunk_sector_size(&chunk, device->sector_size);
1609                 btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1610                 btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1611                 btrfs_set_stack_stripe_devid(&chunk.stripe, device->devid);
1612                 btrfs_set_stack_stripe_offset(&chunk.stripe,
1613                                               cache->key.objectid);
1614                 memcpy(&chunk.stripe.dev_uuid, device->uuid, BTRFS_UUID_SIZE);
1615
1616                 key.objectid = chunk_objectid;
1617                 key.offset = cache->key.objectid;
1618                 key.type = BTRFS_CHUNK_ITEM_KEY;
1619
1620                 ret = btrfs_insert_item(trans, chunk_root, &key, &chunk,
1621                                         btrfs_chunk_item_size(1));
1622                 if (ret)
1623                         goto err;
1624
1625                 cur_start = cache->key.objectid + cache->key.offset;
1626         }
1627
1628         device->bytes_used = total_bytes;
1629         ret = btrfs_update_device(trans, device);
1630 err:
1631         btrfs_release_path(device->dev_root, &path);
1632         return ret;
1633 }
1634
1635 static int create_subvol(struct btrfs_trans_handle *trans,
1636                          struct btrfs_root *root, u64 root_objectid)
1637 {
1638         struct extent_buffer *tmp;
1639         struct btrfs_root *new_root;
1640         struct btrfs_key key;
1641         struct btrfs_root_item root_item;
1642         int ret;
1643
1644         ret = btrfs_copy_root(trans, root, root->node, &tmp,
1645                               root_objectid);
1646         BUG_ON(ret);
1647
1648         memcpy(&root_item, &root->root_item, sizeof(root_item));
1649         btrfs_set_root_bytenr(&root_item, tmp->start);
1650         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
1651         btrfs_set_root_generation(&root_item, trans->transid);
1652         free_extent_buffer(tmp);
1653
1654         key.objectid = root_objectid;
1655         key.type = BTRFS_ROOT_ITEM_KEY;
1656         key.offset = trans->transid;
1657         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
1658                                 &key, &root_item);
1659
1660         key.offset = (u64)-1;
1661         new_root = btrfs_read_fs_root(root->fs_info, &key);
1662         BUG_ON(!new_root || IS_ERR(new_root));
1663
1664         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
1665         BUG_ON(ret);
1666
1667         return 0;
1668 }
1669
1670 static int init_btrfs(struct btrfs_root *root)
1671 {
1672         int ret;
1673         struct btrfs_key location;
1674         struct btrfs_trans_handle *trans;
1675         struct btrfs_fs_info *fs_info = root->fs_info;
1676         struct extent_buffer *tmp;
1677
1678         trans = btrfs_start_transaction(root, 1);
1679         BUG_ON(!trans);
1680         ret = btrfs_make_block_groups(trans, root);
1681         if (ret)
1682                 goto err;
1683         ret = btrfs_fix_block_accounting(trans, root);
1684         if (ret)
1685                 goto err;
1686         ret = create_chunk_mapping(trans, root);
1687         if (ret)
1688                 goto err;
1689         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1690                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1691         if (ret)
1692                 goto err;
1693         memcpy(&location, &root->root_key, sizeof(location));
1694         location.offset = (u64)-1;
1695         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1696                                 btrfs_super_root_dir(fs_info->super_copy),
1697                                 &location, BTRFS_FT_DIR, 0);
1698         if (ret)
1699                 goto err;
1700         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1701                                 location.objectid,
1702                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1703         if (ret)
1704                 goto err;
1705         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1706                              BTRFS_FIRST_FREE_OBJECTID);
1707
1708         /* subvol for ext2 image file */
1709         ret = create_subvol(trans, root, EXT2_IMAGE_SUBVOL_OBJECTID);
1710         BUG_ON(ret);
1711         /* subvol for data relocation */
1712         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1713         BUG_ON(ret);
1714
1715         ret = __btrfs_cow_block(trans, fs_info->csum_root,
1716                                 fs_info->csum_root->node, NULL, 0, &tmp, 0, 0);
1717         BUG_ON(ret);
1718         free_extent_buffer(tmp);
1719
1720         ret = btrfs_commit_transaction(trans, root);
1721         BUG_ON(ret);
1722 err:
1723         return ret;
1724 }
1725
1726 /*
1727  * Migrate super block to it's default position and zero 0 ~ 16k
1728  */
1729 static int migrate_super_block(int fd, u64 old_bytenr, u32 sectorsize)
1730 {
1731         int ret;
1732         struct extent_buffer *buf;
1733         struct btrfs_super_block *super;
1734         u32 len;
1735         u32 bytenr;
1736
1737         BUG_ON(sectorsize < sizeof(*super));
1738         buf = malloc(sizeof(*buf) + sectorsize);
1739         if (!buf)
1740                 return -ENOMEM;
1741
1742         buf->len = sectorsize;
1743         ret = pread(fd, buf->data, sectorsize, old_bytenr);
1744         if (ret != sectorsize)
1745                 goto fail;
1746
1747         super = (struct btrfs_super_block *)buf->data;
1748         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1749         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1750
1751         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1752         ret = pwrite(fd, buf->data, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1753         if (ret != sectorsize)
1754                 goto fail;
1755
1756         ret = fsync(fd);
1757         if (ret)
1758                 goto fail;
1759
1760         memset(buf->data, 0, sectorsize);
1761         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
1762                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
1763                 if (len > sectorsize)
1764                         len = sectorsize;
1765                 ret = pwrite(fd, buf->data, len, bytenr);
1766                 if (ret != len) {
1767                         fprintf(stderr, "unable to zero fill device\n");
1768                         break;
1769                 }
1770                 bytenr += len;
1771         }
1772         ret = 0;
1773         fsync(fd);
1774 fail:
1775         free(buf);
1776         if (ret > 0)
1777                 ret = -1;
1778         return ret;
1779 }
1780
1781 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
1782 {
1783         struct btrfs_chunk *chunk;
1784         struct btrfs_disk_key *key;
1785         u32 sectorsize = btrfs_super_sectorsize(super);
1786
1787         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1788         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1789                                        sizeof(struct btrfs_disk_key));
1790
1791         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1792         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1793         btrfs_set_disk_key_offset(key, 0);
1794
1795         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
1796         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1797         btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
1798         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1799         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1800         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1801         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1802         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1803         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1804         chunk->stripe.devid = super->dev_item.devid;
1805         chunk->stripe.offset = cpu_to_le64(0);
1806         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1807         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1808         return 0;
1809 }
1810
1811 static int prepare_system_chunk(int fd, u64 sb_bytenr, u32 sectorsize)
1812 {
1813         int ret;
1814         struct extent_buffer *buf;
1815         struct btrfs_super_block *super;
1816
1817         BUG_ON(sectorsize < sizeof(*super));
1818         buf = malloc(sizeof(*buf) + sectorsize);
1819         if (!buf)
1820                 return -ENOMEM;
1821
1822         buf->len = sectorsize;
1823         ret = pread(fd, buf->data, sectorsize, sb_bytenr);
1824         if (ret != sectorsize)
1825                 goto fail;
1826
1827         super = (struct btrfs_super_block *)buf->data;
1828         BUG_ON(btrfs_super_bytenr(super) != sb_bytenr);
1829         BUG_ON(btrfs_super_num_devices(super) != 1);
1830
1831         ret = prepare_system_chunk_sb(super);
1832         if (ret)
1833                 goto fail;
1834
1835         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1836         ret = pwrite(fd, buf->data, sectorsize, sb_bytenr);
1837         if (ret != sectorsize)
1838                 goto fail;
1839
1840         ret = 0;
1841 fail:
1842         free(buf);
1843         if (ret > 0)
1844                 ret = -1;
1845         return ret;
1846 }
1847
1848 static int relocate_one_reference(struct btrfs_trans_handle *trans,
1849                                   struct btrfs_root *root,
1850                                   u64 extent_start, u64 extent_size,
1851                                   struct btrfs_key *extent_key,
1852                                   struct extent_io_tree *reloc_tree)
1853 {
1854         struct extent_buffer *leaf;
1855         struct btrfs_file_extent_item *fi;
1856         struct btrfs_key key;
1857         struct btrfs_path path;
1858         struct btrfs_inode_item inode;
1859         struct blk_iterate_data data;
1860         u64 bytenr;
1861         u64 num_bytes;
1862         u64 cur_offset;
1863         u64 new_pos;
1864         u64 nbytes;
1865         u64 sector_end;
1866         u32 sectorsize = root->sectorsize;
1867         unsigned long ptr;
1868         int datacsum;
1869         int fd;
1870         int ret;
1871
1872         btrfs_init_path(&path);
1873         ret = btrfs_search_slot(trans, root, extent_key, &path, -1, 1);
1874         if (ret)
1875                 goto fail;
1876
1877         leaf = path.nodes[0];
1878         fi = btrfs_item_ptr(leaf, path.slots[0],
1879                             struct btrfs_file_extent_item);
1880         BUG_ON(btrfs_file_extent_offset(leaf, fi) > 0);
1881         if (extent_start != btrfs_file_extent_disk_bytenr(leaf, fi) ||
1882             extent_size != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1883                 ret = 1;
1884                 goto fail;
1885         }
1886
1887         bytenr = extent_start + btrfs_file_extent_offset(leaf, fi);
1888         num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
1889
1890         ret = btrfs_del_item(trans, root, &path);
1891         if (ret)
1892                 goto fail;
1893
1894         ret = btrfs_free_extent(trans, root, extent_start, extent_size, 0,
1895                                 root->root_key.objectid,
1896                                 extent_key->objectid, extent_key->offset);
1897         if (ret)
1898                 goto fail;
1899
1900         btrfs_release_path(root, &path);
1901
1902         key.objectid = extent_key->objectid;
1903         key.offset = 0;
1904         key.type =  BTRFS_INODE_ITEM_KEY;
1905         ret = btrfs_lookup_inode(trans, root, &path, &key, 0);
1906         if (ret)
1907                 goto fail;
1908
1909         leaf = path.nodes[0];
1910         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
1911         read_extent_buffer(leaf, &inode, ptr, sizeof(inode));
1912         btrfs_release_path(root, &path);
1913
1914         BUG_ON(num_bytes & (sectorsize - 1));
1915         nbytes = btrfs_stack_inode_nbytes(&inode) - num_bytes;
1916         btrfs_set_stack_inode_nbytes(&inode, nbytes);
1917         datacsum = !(btrfs_stack_inode_flags(&inode) & BTRFS_INODE_NODATASUM);
1918
1919         data = (struct blk_iterate_data) {
1920                 .trans          = trans,
1921                 .root           = root,
1922                 .inode          = &inode,
1923                 .objectid       = extent_key->objectid,
1924                 .first_block    = extent_key->offset / sectorsize,
1925                 .disk_block     = 0,
1926                 .num_blocks     = 0,
1927                 .boundary       = (u64)-1,
1928                 .checksum       = datacsum,
1929                 .errcode        = 0,
1930         };
1931
1932         cur_offset = extent_key->offset;
1933         while (num_bytes > 0) {
1934                 sector_end = bytenr + sectorsize - 1;
1935                 if (test_range_bit(reloc_tree, bytenr, sector_end,
1936                                    EXTENT_LOCKED, 1)) {
1937                         ret = get_state_private(reloc_tree, bytenr, &new_pos);
1938                         BUG_ON(ret);
1939                 } else {
1940                         ret = custom_alloc_extent(root, sectorsize, 0, &key);
1941                         if (ret)
1942                                 goto fail;
1943                         new_pos = key.objectid;
1944
1945                         if (cur_offset == extent_key->offset) {
1946                                 fd = root->fs_info->fs_devices->latest_bdev;
1947                                 readahead(fd, bytenr, num_bytes);
1948                         }
1949                         ret = copy_disk_extent(root, new_pos, bytenr,
1950                                                sectorsize);
1951                         if (ret)
1952                                 goto fail;
1953                         ret = set_extent_bits(reloc_tree, bytenr, sector_end,
1954                                               EXTENT_LOCKED, GFP_NOFS);
1955                         BUG_ON(ret);
1956                         ret = set_state_private(reloc_tree, bytenr, new_pos);
1957                         BUG_ON(ret);
1958                 }
1959
1960                 ret = block_iterate_proc(NULL, new_pos / sectorsize,
1961                                          cur_offset / sectorsize, &data);
1962                 if (ret & BLOCK_ABORT) {
1963                         ret = data.errcode;
1964                         goto fail;
1965                 }
1966
1967                 cur_offset += sectorsize;
1968                 bytenr += sectorsize;
1969                 num_bytes -= sectorsize;
1970         }
1971
1972         if (data.num_blocks > 0) {
1973                 ret = record_file_blocks(trans, root,
1974                                          extent_key->objectid, &inode,
1975                                          data.first_block, data.disk_block,
1976                                          data.num_blocks, datacsum);
1977                 if (ret)
1978                         goto fail;
1979         }
1980
1981         key.objectid = extent_key->objectid;
1982         key.offset = 0;
1983         key.type =  BTRFS_INODE_ITEM_KEY;
1984         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
1985         if (ret)
1986                 goto fail;
1987
1988         leaf = path.nodes[0];
1989         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
1990         write_extent_buffer(leaf, &inode, ptr, sizeof(inode));
1991         btrfs_mark_buffer_dirty(leaf);
1992         btrfs_release_path(root, &path);
1993
1994 fail:
1995         btrfs_release_path(root, &path);
1996         return ret;
1997 }
1998
1999 static int relocate_extents_range(struct btrfs_root *fs_root,
2000                                   struct btrfs_root *ext2_root,
2001                                   u64 start_byte, u64 end_byte)
2002 {
2003         struct btrfs_fs_info *info = fs_root->fs_info;
2004         struct btrfs_root *extent_root = info->extent_root;
2005         struct btrfs_root *cur_root = NULL;
2006         struct btrfs_trans_handle *trans;
2007         struct btrfs_extent_data_ref *dref;
2008         struct btrfs_extent_inline_ref *iref;
2009         struct btrfs_extent_item *ei;
2010         struct extent_buffer *leaf;
2011         struct btrfs_key key;
2012         struct btrfs_key extent_key;
2013         struct btrfs_path path;
2014         struct extent_io_tree reloc_tree;
2015         unsigned long ptr;
2016         unsigned long end;
2017         u64 cur_byte;
2018         u64 num_bytes;
2019         u64 ref_root;
2020         u64 num_extents;
2021         int pass = 0;
2022         int ret;
2023
2024         btrfs_init_path(&path);
2025         extent_io_tree_init(&reloc_tree);
2026
2027         key.objectid = start_byte;
2028         key.offset = 0;
2029         key.type = BTRFS_EXTENT_ITEM_KEY;
2030         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2031         if (ret < 0)
2032                 goto fail;
2033         if (ret > 0) {
2034                 ret = btrfs_previous_item(extent_root, &path, 0,
2035                                           BTRFS_EXTENT_ITEM_KEY);
2036                 if (ret < 0)
2037                         goto fail;
2038                 if (ret == 0) {
2039                         leaf = path.nodes[0];
2040                         btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2041                         if (key.objectid + key.offset > start_byte)
2042                                 start_byte = key.objectid;
2043                 }
2044         }
2045         btrfs_release_path(extent_root, &path);
2046 again:
2047         cur_root = (pass % 2 == 0) ? ext2_root : fs_root;
2048         num_extents = 0;
2049
2050         trans = btrfs_start_transaction(cur_root, 1);
2051         BUG_ON(!trans);
2052
2053         cur_byte = start_byte;
2054         while (1) {
2055                 key.objectid = cur_byte;
2056                 key.offset = 0;
2057                 key.type = BTRFS_EXTENT_ITEM_KEY;
2058                 ret = btrfs_search_slot(trans, extent_root,
2059                                         &key, &path, 0, 0);
2060                 if (ret < 0)
2061                         goto fail;
2062 next:
2063                 leaf = path.nodes[0];
2064                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2065                         ret = btrfs_next_leaf(extent_root, &path);
2066                         if (ret < 0)
2067                                 goto fail;
2068                         if (ret > 0)
2069                                 break;
2070                         leaf = path.nodes[0];
2071                 }
2072
2073                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2074                 if (key.objectid < cur_byte ||
2075                     key.type != BTRFS_EXTENT_ITEM_KEY) {
2076                         path.slots[0]++;
2077                         goto next;
2078                 }
2079                 if (key.objectid >= end_byte)
2080                         break;
2081
2082                 num_extents++;
2083
2084                 cur_byte = key.objectid;
2085                 num_bytes = key.offset;
2086                 ei = btrfs_item_ptr(leaf, path.slots[0],
2087                                     struct btrfs_extent_item);
2088                 BUG_ON(!(btrfs_extent_flags(leaf, ei) &
2089                          BTRFS_EXTENT_FLAG_DATA));
2090
2091                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2092                 end = ptr + btrfs_item_size_nr(leaf, path.slots[0]);
2093
2094                 ptr += sizeof(struct btrfs_extent_item);
2095
2096                 while (ptr < end) {
2097                         iref = (struct btrfs_extent_inline_ref *)ptr;
2098                         key.type = btrfs_extent_inline_ref_type(leaf, iref);
2099                         BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
2100                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
2101                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
2102                         extent_key.objectid =
2103                                 btrfs_extent_data_ref_objectid(leaf, dref);
2104                         extent_key.offset =
2105                                 btrfs_extent_data_ref_offset(leaf, dref);
2106                         extent_key.type = BTRFS_EXTENT_DATA_KEY;
2107                         BUG_ON(btrfs_extent_data_ref_count(leaf, dref) != 1);
2108
2109                         if (ref_root == cur_root->root_key.objectid)
2110                                 break;
2111
2112                         ptr += btrfs_extent_inline_ref_size(key.type);
2113                 }
2114
2115                 if (ptr >= end) {
2116                         path.slots[0]++;
2117                         goto next;
2118                 }
2119
2120                 ret = relocate_one_reference(trans, cur_root, cur_byte,
2121                                              num_bytes, &extent_key,
2122                                              &reloc_tree);
2123                 if (ret < 0)
2124                         goto fail;
2125
2126                 cur_byte += num_bytes;
2127                 btrfs_release_path(extent_root, &path);
2128
2129                 if (trans->blocks_used >= 4096) {
2130                         ret = btrfs_commit_transaction(trans, cur_root);
2131                         BUG_ON(ret);
2132                         trans = btrfs_start_transaction(cur_root, 1);
2133                         BUG_ON(!trans);
2134                 }
2135         }
2136         btrfs_release_path(cur_root, &path);
2137
2138         ret = btrfs_commit_transaction(trans, cur_root);
2139         BUG_ON(ret);
2140
2141         if (num_extents > 0 && pass++ < 16)
2142                 goto again;
2143
2144         ret = (num_extents > 0) ? -1 : 0;
2145 fail:
2146         btrfs_release_path(cur_root, &path);
2147         extent_io_tree_cleanup(&reloc_tree);
2148         return ret;
2149 }
2150
2151 /*
2152  * relocate data in system chunk
2153  */
2154 static int cleanup_sys_chunk(struct btrfs_root *fs_root,
2155                              struct btrfs_root *ext2_root)
2156 {
2157         struct btrfs_block_group_cache *cache;
2158         int i, ret = 0;
2159         u64 offset = 0;
2160         u64 end_byte;
2161
2162         while(1) {
2163                 cache = btrfs_lookup_block_group(fs_root->fs_info, offset);
2164                 if (!cache)
2165                         break;
2166
2167                 end_byte = cache->key.objectid + cache->key.offset;
2168                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2169                         ret = relocate_extents_range(fs_root, ext2_root,
2170                                                      cache->key.objectid,
2171                                                      end_byte);
2172                         if (ret)
2173                                 goto fail;
2174                 }
2175                 offset = end_byte;
2176         }
2177         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2178                 offset = btrfs_sb_offset(i);
2179                 offset &= ~((u64)STRIPE_LEN - 1);
2180
2181                 ret = relocate_extents_range(fs_root, ext2_root,
2182                                              offset, offset + STRIPE_LEN);
2183                 if (ret)
2184                         goto fail;
2185         }
2186         ret = 0;
2187 fail:
2188         return ret;
2189 }
2190
2191 static int fixup_chunk_mapping(struct btrfs_root *root)
2192 {
2193         struct btrfs_trans_handle *trans;
2194         struct btrfs_fs_info *info = root->fs_info;
2195         struct btrfs_root *chunk_root = info->chunk_root;
2196         struct extent_buffer *leaf;
2197         struct btrfs_key key;
2198         struct btrfs_path path;
2199         struct btrfs_chunk chunk;
2200         unsigned long ptr;
2201         u32 size;
2202         u64 type;
2203         int ret;
2204
2205         btrfs_init_path(&path);
2206
2207         trans = btrfs_start_transaction(root, 1);
2208         BUG_ON(!trans);
2209
2210         /*
2211          * recow the whole chunk tree. this will move all chunk tree blocks
2212          * into system block group.
2213          */
2214         memset(&key, 0, sizeof(key));
2215         while (1) {
2216                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2217                 if (ret < 0)
2218                         goto err;
2219
2220                 ret = btrfs_next_leaf(chunk_root, &path);
2221                 if (ret < 0)
2222                         goto err;
2223                 if (ret > 0)
2224                         break;
2225
2226                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2227                 btrfs_release_path(chunk_root, &path);
2228         }
2229         btrfs_release_path(chunk_root, &path);
2230
2231         /* fixup the system chunk array in super block */
2232         btrfs_set_super_sys_array_size(info->super_copy, 0);
2233
2234         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2235         key.offset = 0;
2236         key.type = BTRFS_CHUNK_ITEM_KEY;
2237
2238         ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 0);
2239         if (ret < 0)
2240                 goto err;
2241         BUG_ON(ret != 0);
2242         while(1) {
2243                 leaf = path.nodes[0];
2244                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2245                         ret = btrfs_next_leaf(chunk_root, &path);
2246                         if (ret < 0)
2247                                 goto err;
2248                         if (ret > 0)
2249                                 break;
2250                         leaf = path.nodes[0];
2251                 }
2252                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2253                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2254                         goto next;
2255
2256                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2257                 size = btrfs_item_size_nr(leaf, path.slots[0]);
2258                 BUG_ON(size != sizeof(chunk));
2259                 read_extent_buffer(leaf, &chunk, ptr, size);
2260                 type = btrfs_stack_chunk_type(&chunk);
2261
2262                 if (!(type & BTRFS_BLOCK_GROUP_SYSTEM))
2263                         goto next;
2264
2265                 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
2266                                              &chunk, size);
2267                 if (ret)
2268                         goto err;
2269 next:
2270                 path.slots[0]++;
2271         }
2272
2273         ret = btrfs_commit_transaction(trans, root);
2274         BUG_ON(ret);
2275 err:
2276         btrfs_release_path(chunk_root, &path);
2277         return ret;
2278 }
2279
2280 int do_convert(const char *devname, int datacsum, int packing, int noxattr)
2281 {
2282         int i, ret;
2283         int fd = -1;
2284         u32 blocksize;
2285         u64 blocks[7];
2286         u64 total_bytes;
2287         u64 super_bytenr;
2288         ext2_filsys ext2_fs;
2289         struct btrfs_root *root;
2290         struct btrfs_root *ext2_root;
2291
2292         ret = open_ext2fs(devname, &ext2_fs);
2293         if (ret) {
2294                 fprintf(stderr, "unable to open the Ext2fs\n");
2295                 goto fail;
2296         }
2297         blocksize = ext2_fs->blocksize;
2298         total_bytes = (u64)ext2_fs->super->s_blocks_count * blocksize;
2299         if (blocksize < 4096) {
2300                 fprintf(stderr, "block size is too small\n");
2301                 goto fail;
2302         }
2303         if (!(ext2_fs->super->s_feature_incompat &
2304               EXT2_FEATURE_INCOMPAT_FILETYPE)) {
2305                 fprintf(stderr, "filetype feature is missing\n");
2306                 goto fail;
2307         }
2308         for (i = 0; i < 7; i++) {
2309                 ret = ext2_alloc_block(ext2_fs, 0, blocks + i);
2310                 if (ret) {
2311                         fprintf(stderr, "not enough free space\n");
2312                         goto fail;
2313                 }
2314                 blocks[i] *= blocksize;
2315         }
2316         super_bytenr = blocks[0];
2317         fd = open(devname, O_RDWR);
2318         if (fd < 0) {
2319                 fprintf(stderr, "unable to open %s\n", devname);
2320                 goto fail;
2321         }
2322         ret = make_btrfs(fd, devname, ext2_fs->super->s_volume_name,
2323                          blocks, total_bytes, blocksize, blocksize,
2324                          blocksize, blocksize);
2325         if (ret) {
2326                 fprintf(stderr, "unable to create initial ctree\n");
2327                 goto fail;
2328         }
2329         /* create a system chunk that maps the whole device */
2330         ret = prepare_system_chunk(fd, super_bytenr, blocksize);
2331         if (ret) {
2332                 fprintf(stderr, "unable to update system chunk\n");
2333                 goto fail;
2334         }
2335         root = open_ctree_fd(fd, devname, super_bytenr, O_RDWR);
2336         if (!root) {
2337                 fprintf(stderr, "unable to open ctree\n");
2338                 goto fail;
2339         }
2340         ret = cache_free_extents(root, ext2_fs);
2341         if (ret) {
2342                 fprintf(stderr, "error during cache_free_extents %d\n", ret);
2343                 goto fail;
2344         }
2345         root->fs_info->extent_ops = &extent_ops;
2346         /* recover block allocation bitmap */
2347         for (i = 0; i < 7; i++) {
2348                 blocks[i] /= blocksize;
2349                 ext2_free_block(ext2_fs, blocks[i]);
2350         }
2351         ret = init_btrfs(root);
2352         if (ret) {
2353                 fprintf(stderr, "unable to setup the root tree\n");
2354                 goto fail;
2355         }
2356         printf("creating btrfs metadata.\n");
2357         ret = copy_inodes(root, ext2_fs, datacsum, packing, noxattr);
2358         if (ret) {
2359                 fprintf(stderr, "error during copy_inodes %d\n", ret);
2360                 goto fail;
2361         }
2362         printf("creating ext2fs image file.\n");
2363         ext2_root = link_subvol(root, "ext2_saved", EXT2_IMAGE_SUBVOL_OBJECTID);
2364         if (!ext2_root) {
2365                 fprintf(stderr, "unable to create subvol\n");
2366                 goto fail;
2367         }
2368         ret = create_ext2_image(ext2_root, ext2_fs, "image");
2369         if (ret) {
2370                 fprintf(stderr, "error during create_ext2_image %d\n", ret);
2371                 goto fail;
2372         }
2373         printf("cleaning up system chunk.\n");
2374         ret = cleanup_sys_chunk(root, ext2_root);
2375         if (ret) {
2376                 fprintf(stderr, "error during cleanup_sys_chunk %d\n", ret);
2377                 goto fail;
2378         }
2379         ret = close_ctree(root);
2380         if (ret) {
2381                 fprintf(stderr, "error during close_ctree %d\n", ret);
2382                 goto fail;
2383         }
2384         close_ext2fs(ext2_fs);
2385
2386         /*
2387          * If this step succeed, we get a mountable btrfs. Otherwise
2388          * the ext2fs is left unchanged.
2389          */
2390         ret = migrate_super_block(fd, super_bytenr, blocksize);
2391         if (ret) {
2392                 fprintf(stderr, "unable to migrate super block\n");
2393                 goto fail;
2394         }
2395
2396         root = open_ctree_fd(fd, devname, 0, O_RDWR);
2397         if (!root) {
2398                 fprintf(stderr, "unable to open ctree\n");
2399                 goto fail;
2400         }
2401         /* move chunk tree into system chunk. */
2402         ret = fixup_chunk_mapping(root);
2403         if (ret) {
2404                 fprintf(stderr, "error during fixup_chunk_tree\n");
2405                 goto fail;
2406         }
2407         ret = close_ctree(root);
2408         close(fd);
2409
2410         printf("conversion complete.\n");
2411         return 0;
2412 fail:
2413         if (fd != -1)
2414                 close(fd);
2415         fprintf(stderr, "conversion aborted.\n");
2416         return -1;
2417 }
2418
2419 static int may_rollback(struct btrfs_root *root)
2420 {
2421         struct btrfs_fs_info *info = root->fs_info;
2422         struct btrfs_multi_bio *multi = NULL;
2423         u64 bytenr;
2424         u64 length;
2425         u64 physical;
2426         u64 total_bytes;
2427         int num_stripes;
2428         int ret;
2429
2430         if (btrfs_super_num_devices(info->super_copy) != 1)
2431                 goto fail;
2432
2433         bytenr = BTRFS_SUPER_INFO_OFFSET;
2434         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
2435
2436         while (1) {
2437                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
2438                                       &length, &multi, 0, NULL);
2439                 if (ret)
2440                         goto fail;
2441
2442                 num_stripes = multi->num_stripes;
2443                 physical = multi->stripes[0].physical;
2444                 kfree(multi);
2445
2446                 if (num_stripes != 1 || physical != bytenr)
2447                         goto fail;
2448
2449                 bytenr += length;
2450                 if (bytenr >= total_bytes)
2451                         break;
2452         }
2453         return 0;
2454 fail:
2455         return -1;
2456 }
2457
2458 int do_rollback(const char *devname, int force)
2459 {
2460         int fd = -1;
2461         int ret;
2462         int i;
2463         struct btrfs_root *root;
2464         struct btrfs_root *ext2_root;
2465         struct btrfs_root *chunk_root;
2466         struct btrfs_dir_item *dir;
2467         struct btrfs_inode_item *inode;
2468         struct btrfs_file_extent_item *fi;
2469         struct btrfs_trans_handle *trans;
2470         struct extent_buffer *leaf;
2471         struct btrfs_block_group_cache *cache1;
2472         struct btrfs_block_group_cache *cache2;
2473         struct btrfs_key key;
2474         struct btrfs_path path;
2475         struct extent_io_tree io_tree;
2476         char *buf = NULL;
2477         char *name;
2478         u64 bytenr;
2479         u64 num_bytes;
2480         u64 root_dir;
2481         u64 objectid;
2482         u64 offset;
2483         u64 start;
2484         u64 end;
2485         u64 sb_bytenr;
2486         u64 first_free;
2487         u64 total_bytes;
2488         u32 sectorsize;
2489
2490         extent_io_tree_init(&io_tree);
2491
2492         fd = open(devname, O_RDWR);
2493         if (fd < 0) {
2494                 fprintf(stderr, "unable to open %s\n", devname);
2495                 goto fail;
2496         }
2497         root = open_ctree_fd(fd, devname, 0, O_RDWR);
2498         if (!root) {
2499                 fprintf(stderr, "unable to open ctree\n");
2500                 goto fail;
2501         }
2502         ret = may_rollback(root);
2503         if (ret < 0) {
2504                 fprintf(stderr, "unable to do rollback\n");
2505                 goto fail;
2506         }
2507
2508         sectorsize = root->sectorsize;
2509         buf = malloc(sectorsize);
2510         if (!buf) {
2511                 fprintf(stderr, "unable to allocate memory\n");
2512                 goto fail;
2513         }
2514
2515         btrfs_init_path(&path);
2516
2517         key.objectid = EXT2_IMAGE_SUBVOL_OBJECTID;
2518         key.type = BTRFS_ROOT_ITEM_KEY;
2519         key.offset = (u64)-1;
2520         ext2_root = btrfs_read_fs_root(root->fs_info, &key);
2521         if (!ext2_root || IS_ERR(ext2_root)) {
2522                 fprintf(stderr, "unable to open subvol %llu\n",
2523                         key.objectid);
2524                 goto fail;
2525         }
2526
2527         name = "image";
2528         root_dir = btrfs_root_dirid(&root->root_item);
2529         dir = btrfs_lookup_dir_item(NULL, ext2_root, &path,
2530                                    root_dir, name, strlen(name), 0);
2531         if (!dir || IS_ERR(dir)) {
2532                 fprintf(stderr, "unable to find file %s\n", name);
2533                 goto fail;
2534         }
2535         leaf = path.nodes[0];
2536         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
2537         btrfs_release_path(ext2_root, &path);
2538
2539         objectid = key.objectid;
2540
2541         ret = btrfs_lookup_inode(NULL, ext2_root, &path, &key, 0);
2542         if (ret) {
2543                 fprintf(stderr, "unable to find inode item\n");
2544                 goto fail;
2545         }
2546         leaf = path.nodes[0];
2547         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
2548         total_bytes = btrfs_inode_size(leaf, inode);
2549         btrfs_release_path(ext2_root, &path);
2550
2551         key.objectid = objectid;
2552         key.offset = 0;
2553         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2554         ret = btrfs_search_slot(NULL, ext2_root, &key, &path, 0, 0);
2555         if (ret != 0) {
2556                 fprintf(stderr, "unable to find first file extent\n");
2557                 btrfs_release_path(ext2_root, &path);
2558                 goto fail;
2559         }
2560
2561         /* build mapping tree for the relocated blocks */
2562         for (offset = 0; offset < total_bytes; ) {
2563                 leaf = path.nodes[0];
2564                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2565                         ret = btrfs_next_leaf(root, &path);
2566                         if (ret != 0)
2567                                 break;  
2568                         continue;
2569                 }
2570
2571                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2572                 if (key.objectid != objectid || key.offset != offset ||
2573                     btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2574                         break;
2575
2576                 fi = btrfs_item_ptr(leaf, path.slots[0],
2577                                     struct btrfs_file_extent_item);
2578                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2579                         break;
2580                 if (btrfs_file_extent_compression(leaf, fi) ||
2581                     btrfs_file_extent_encryption(leaf, fi) ||
2582                     btrfs_file_extent_other_encoding(leaf, fi))
2583                         break;
2584
2585                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2586                 /* skip holes and direct mapped extents */
2587                 if (bytenr == 0 || bytenr == offset)
2588                         goto next_extent;
2589
2590                 bytenr += btrfs_file_extent_offset(leaf, fi);
2591                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
2592
2593                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
2594                 cache2 =  btrfs_lookup_block_group(root->fs_info,
2595                                                    offset + num_bytes - 1);
2596                 if (!cache1 || cache1 != cache2 ||
2597                     (!(cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
2598                      !intersect_with_sb(offset, num_bytes)))
2599                         break;
2600
2601                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
2602                                 EXTENT_LOCKED, GFP_NOFS);
2603                 set_state_private(&io_tree, offset, bytenr);
2604 next_extent:
2605                 offset += btrfs_file_extent_num_bytes(leaf, fi);
2606                 path.slots[0]++;
2607         }
2608         btrfs_release_path(ext2_root, &path);
2609
2610         if (offset < total_bytes) {
2611                 fprintf(stderr, "unable to build extent mapping\n");
2612                 goto fail;
2613         }
2614
2615         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
2616         first_free &= ~((u64)sectorsize - 1);
2617         /* backup for extent #0 should exist */
2618         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
2619                 fprintf(stderr, "no backup for the first extent\n");
2620                 goto fail;
2621         }
2622         /* force no allocation from system block group */
2623         root->fs_info->system_allocs = -1;
2624         trans = btrfs_start_transaction(root, 1);
2625         BUG_ON(!trans);
2626         /*
2627          * recow the whole chunk tree, this will remove all chunk tree blocks
2628          * from system block group
2629          */
2630         chunk_root = root->fs_info->chunk_root;
2631         memset(&key, 0, sizeof(key));
2632         while (1) {
2633                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2634                 if (ret < 0)
2635                         break;
2636
2637                 ret = btrfs_next_leaf(chunk_root, &path);
2638                 if (ret)
2639                         break;
2640
2641                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2642                 btrfs_release_path(chunk_root, &path);
2643         }
2644         btrfs_release_path(chunk_root, &path);
2645
2646         offset = 0;
2647         num_bytes = 0;
2648         while(1) {
2649                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
2650                 if (!cache1)
2651                         break;
2652
2653                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
2654                         num_bytes += btrfs_block_group_used(&cache1->item);
2655
2656                 offset = cache1->key.objectid + cache1->key.offset;
2657         }
2658         /* only extent #0 left in system block group? */
2659         if (num_bytes > first_free) {
2660                 fprintf(stderr, "unable to empty system block group\n");
2661                 goto fail;
2662         }
2663         /* create a system chunk that maps the whole device */
2664         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
2665         if (ret) {
2666                 fprintf(stderr, "unable to update system chunk\n");
2667                 goto fail;
2668         }
2669
2670         ret = btrfs_commit_transaction(trans, root);
2671         BUG_ON(ret);
2672
2673         ret = close_ctree(root);
2674         if (ret) {
2675                 fprintf(stderr, "error during close_ctree %d\n", ret);
2676                 goto fail;
2677         }
2678
2679         /* zero btrfs super block mirrors */
2680         memset(buf, 0, sectorsize);
2681         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2682                 bytenr = btrfs_sb_offset(i);
2683                 if (bytenr >= total_bytes)
2684                         break;
2685                 ret = pwrite(fd, buf, sectorsize, bytenr);
2686         }
2687
2688         sb_bytenr = (u64)-1;
2689         /* copy all relocated blocks back */
2690         while(1) {
2691                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
2692                                             EXTENT_LOCKED);
2693                 if (ret)
2694                         break;
2695
2696                 ret = get_state_private(&io_tree, start, &bytenr);
2697                 BUG_ON(ret);
2698
2699                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
2700                                   GFP_NOFS);
2701
2702                 while (start <= end) {
2703                         if (start == BTRFS_SUPER_INFO_OFFSET) {
2704                                 sb_bytenr = bytenr;
2705                                 goto next_sector;
2706                         }
2707                         ret = pread(fd, buf, sectorsize, bytenr);
2708                         if (ret < 0) {
2709                                 fprintf(stderr, "error during pread %d\n", ret);
2710                                 goto fail;
2711                         }
2712                         BUG_ON(ret != sectorsize);
2713                         ret = pwrite(fd, buf, sectorsize, start);
2714                         if (ret < 0) {
2715                                 fprintf(stderr, "error during pwrite %d\n", ret);
2716                                 goto fail;
2717                         }
2718                         BUG_ON(ret != sectorsize);
2719 next_sector:
2720                         start += sectorsize;
2721                         bytenr += sectorsize;
2722                 }
2723         }
2724
2725         ret = fsync(fd);
2726         if (ret) {
2727                 fprintf(stderr, "error during fsync %d\n", ret);
2728                 goto fail;
2729         }
2730         /*
2731          * finally, overwrite btrfs super block.
2732          */
2733         ret = pread(fd, buf, sectorsize, sb_bytenr);
2734         if (ret < 0) {
2735                 fprintf(stderr, "error during pread %d\n", ret);
2736                 goto fail;
2737         }
2738         BUG_ON(ret != sectorsize);
2739         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
2740         if (ret < 0) {
2741                 fprintf(stderr, "error during pwrite %d\n", ret);
2742                 goto fail;
2743         }
2744         BUG_ON(ret != sectorsize);
2745         ret = fsync(fd);
2746         if (ret) {
2747                 fprintf(stderr, "error during fsync %d\n", ret);
2748                 goto fail;
2749         }
2750
2751         close(fd);
2752         free(buf);
2753         extent_io_tree_cleanup(&io_tree);
2754         printf("rollback complete.\n");
2755         return 0;
2756
2757 fail:
2758         if (fd != -1)
2759                 close(fd);
2760         free(buf);
2761         fprintf(stderr, "rollback aborted.\n");
2762         return -1;
2763 }
2764
2765 static void print_usage(void)
2766 {
2767         printf("usage: btrfs-convert [-d] [-i] [-n] [-r] device\n");
2768         printf("\t-d disable data checksum\n");
2769         printf("\t-i ignore xattrs and ACLs\n");
2770         printf("\t-n disable packing of small files\n");
2771         printf("\t-r roll back to ext2fs\n");
2772 }
2773
2774 int main(int argc, char *argv[])
2775 {
2776         int ret;
2777         int packing = 1;
2778         int noxattr = 0;
2779         int datacsum = 1;
2780         int rollback = 0;
2781         char *file;
2782         while(1) {
2783                 int c = getopt(argc, argv, "dinr");
2784                 if (c < 0)
2785                         break;
2786                 switch(c) {
2787                         case 'd':
2788                                 datacsum = 0;
2789                                 break;
2790                         case 'i':
2791                                 noxattr = 1;
2792                                 break;
2793                         case 'n':
2794                                 packing = 0;
2795                                 break;
2796                         case 'r':
2797                                 rollback = 1;
2798                                 break;
2799                         default:
2800                                 print_usage();
2801                                 return 1;
2802                 }
2803         }
2804         argc = argc - optind;
2805         if (argc != 1) {
2806                 print_usage();
2807                 return 1;
2808         }
2809
2810         file = argv[optind];
2811         if (check_mounted(file)) {
2812                 fprintf(stderr, "%s is mounted\n", file);
2813                 return 1;
2814         }
2815
2816         if (rollback) {
2817                 ret = do_rollback(file, 0);
2818         } else {
2819                 ret = do_convert(file, datacsum, packing, noxattr);
2820         }
2821         if (ret)
2822                 return 1;
2823         return 0;
2824 }