Btrfs-progs: enhance btrfs qgroup to print the result as a table
[platform/upstream/btrfs-progs.git] / btrfs-convert.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 600
20 #define _GNU_SOURCE 1
21
22 #include "kerncompat.h"
23
24 #include <sys/ioctl.h>
25 #include <sys/mount.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/acl.h>
31 #include <fcntl.h>
32 #include <unistd.h>
33 #include <uuid/uuid.h>
34
35 #include "ctree.h"
36 #include "disk-io.h"
37 #include "volumes.h"
38 #include "transaction.h"
39 #include "crc32c.h"
40 #include "utils.h"
41 #include <ext2fs/ext2_fs.h>
42 #include <ext2fs/ext2fs.h>
43 #include <ext2fs/ext2_ext_attr.h>
44
45 #define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO)
46 #define STRIPE_LEN (64 * 1024)
47 #define EXT2_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID
48
49 /*
50  * Open Ext2fs in readonly mode, read block allocation bitmap and
51  * inode bitmap into memory.
52  */
53 static int open_ext2fs(const char *name, ext2_filsys *ret_fs)
54 {
55         errcode_t ret;
56         ext2_filsys ext2_fs;
57         ext2_ino_t ino;
58         ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs);
59         if (ret) {
60                 fprintf(stderr, "ext2fs_open: %s\n", error_message(ret));
61                 goto fail;
62         }
63         ret = ext2fs_read_inode_bitmap(ext2_fs);
64         if (ret) {
65                 fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n",
66                         error_message(ret));
67                 goto fail;
68         }
69         ret = ext2fs_read_block_bitmap(ext2_fs);
70         if (ret) {
71                 fprintf(stderr, "ext2fs_read_block_bitmap: %s\n",
72                         error_message(ret));
73                 goto fail;
74         }
75         /*
76          * search each block group for a free inode. this set up
77          * uninit block/inode bitmaps appropriately.
78          */
79         ino = 1;
80         while (ino <= ext2_fs->super->s_inodes_count) {
81                 ext2_ino_t foo;
82                 ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo);
83                 ino += EXT2_INODES_PER_GROUP(ext2_fs->super);
84         }
85
86         *ret_fs = ext2_fs;
87         return 0;
88 fail:
89         return -1;
90 }
91
92 static int close_ext2fs(ext2_filsys fs)
93 {
94         ext2fs_close(fs);
95         return 0;
96 }
97
98 static int ext2_alloc_block(ext2_filsys fs, u64 goal, u64 *block_ret)
99 {
100         blk_t block;
101
102         if (!ext2fs_new_block(fs, goal, NULL, &block)) {
103                 ext2fs_fast_mark_block_bitmap(fs->block_map, block);
104                 *block_ret = block;
105                 return 0;
106         }
107         return -ENOSPC;
108 }
109
110 static int ext2_free_block(ext2_filsys fs, u64 block)
111 {
112         BUG_ON(block != (blk_t)block);
113         ext2fs_fast_unmark_block_bitmap(fs->block_map, block);
114         return 0;
115 }
116
117 static int cache_free_extents(struct btrfs_root *root, ext2_filsys ext2_fs)
118
119 {
120         int i, ret = 0;
121         blk_t block;
122         u64 bytenr;
123         u64 blocksize = ext2_fs->blocksize;
124
125         block = ext2_fs->super->s_first_data_block;
126         for (; block < ext2_fs->super->s_blocks_count; block++) {
127                 if (ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block))
128                         continue;
129                 bytenr = block * blocksize;
130                 ret = set_extent_dirty(&root->fs_info->free_space_cache,
131                                        bytenr, bytenr + blocksize - 1, 0);
132                 BUG_ON(ret);
133         }
134
135         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
136                 bytenr = btrfs_sb_offset(i);
137                 bytenr &= ~((u64)STRIPE_LEN - 1);
138                 if (bytenr >= blocksize * ext2_fs->super->s_blocks_count)
139                         break;
140                 clear_extent_dirty(&root->fs_info->free_space_cache, bytenr,
141                                    bytenr + STRIPE_LEN - 1, 0);
142         }
143
144         clear_extent_dirty(&root->fs_info->free_space_cache,
145                            0, BTRFS_SUPER_INFO_OFFSET - 1, 0);
146
147         return 0;
148 }
149
150 static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes,
151                                u64 hint_byte, struct btrfs_key *ins)
152 {
153         u64 start;
154         u64 end;
155         u64 last = hint_byte;
156         int ret;
157         int wrapped = 0;
158         struct btrfs_block_group_cache *cache;
159
160         while(1) {
161                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
162                                             last, &start, &end, EXTENT_DIRTY);
163                 if (ret) {
164                         if (wrapped++ == 0) {
165                                 last = 0;
166                                 continue;
167                         } else {
168                                 goto fail;
169                         }
170                 }
171
172                 start = max(last, start);
173                 last = end + 1;
174                 if (last - start < num_bytes)
175                         continue;
176
177                 last = start + num_bytes;
178                 if (test_range_bit(&root->fs_info->pinned_extents,
179                                    start, last - 1, EXTENT_DIRTY, 0))
180                         continue;
181
182                 cache = btrfs_lookup_block_group(root->fs_info, start);
183                 BUG_ON(!cache);
184                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM ||
185                     last > cache->key.objectid + cache->key.offset) {
186                         last = cache->key.objectid + cache->key.offset;
187                         continue;
188                 }
189
190                 clear_extent_dirty(&root->fs_info->free_space_cache,
191                                    start, start + num_bytes - 1, 0);
192
193                 ins->objectid = start;
194                 ins->offset = num_bytes;
195                 ins->type = BTRFS_EXTENT_ITEM_KEY;
196                 return 0;
197         }
198 fail:
199         fprintf(stderr, "not enough free space\n");
200         return -ENOSPC;
201 }
202
203 static int intersect_with_sb(u64 bytenr, u64 num_bytes)
204 {
205         int i;
206         u64 offset;
207
208         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
209                 offset = btrfs_sb_offset(i);
210                 offset &= ~((u64)STRIPE_LEN - 1);
211
212                 if (bytenr < offset + STRIPE_LEN &&
213                     bytenr + num_bytes > offset)
214                         return 1;
215         }
216         return 0;
217 }
218
219 static int custom_free_extent(struct btrfs_root *root, u64 bytenr,
220                               u64 num_bytes)
221 {
222         return intersect_with_sb(bytenr, num_bytes);
223 }
224
225 static struct btrfs_extent_ops extent_ops = {
226         .alloc_extent = custom_alloc_extent,
227         .free_extent = custom_free_extent,
228 };
229
230 struct dir_iterate_data {
231         struct btrfs_trans_handle *trans;
232         struct btrfs_root *root;
233         struct btrfs_inode_item *inode;
234         u64 objectid;
235         u64 index_cnt;
236         u64 parent;
237         int errcode;
238 };
239
240 static u8 filetype_conversion_table[EXT2_FT_MAX] = {
241         [EXT2_FT_UNKNOWN]       = BTRFS_FT_UNKNOWN,
242         [EXT2_FT_REG_FILE]      = BTRFS_FT_REG_FILE,
243         [EXT2_FT_DIR]           = BTRFS_FT_DIR,
244         [EXT2_FT_CHRDEV]        = BTRFS_FT_CHRDEV,
245         [EXT2_FT_BLKDEV]        = BTRFS_FT_BLKDEV,
246         [EXT2_FT_FIFO]          = BTRFS_FT_FIFO,
247         [EXT2_FT_SOCK]          = BTRFS_FT_SOCK,
248         [EXT2_FT_SYMLINK]       = BTRFS_FT_SYMLINK,
249 };
250
251 static int dir_iterate_proc(ext2_ino_t dir, int entry,
252                             struct ext2_dir_entry *dirent,
253                             int offset, int blocksize,
254                             char *buf,void *priv_data)
255 {
256         int ret;
257         int file_type;
258         u64 objectid;
259         u64 inode_size;
260         char dotdot[] = "..";
261         struct btrfs_key location;
262         struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data;
263         int name_len;
264
265         name_len = dirent->name_len & 0xFF;
266
267         objectid = dirent->inode + INO_OFFSET;
268         if (!strncmp(dirent->name, dotdot, name_len)) {
269                 if (name_len == 2) {
270                         BUG_ON(idata->parent != 0);
271                         idata->parent = objectid;
272                 }
273                 return 0;
274         }
275         if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO)
276                 return 0;
277
278         location.objectid = objectid;
279         location.offset = 0;
280         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
281
282         file_type = dirent->name_len >> 8;
283         BUG_ON(file_type > EXT2_FT_SYMLINK);
284         ret = btrfs_insert_dir_item(idata->trans, idata->root,
285                                     dirent->name, name_len,
286                                     idata->objectid, &location,
287                                     filetype_conversion_table[file_type],
288                                     idata->index_cnt);
289         if (ret)
290                 goto fail;
291         ret = btrfs_insert_inode_ref(idata->trans, idata->root,
292                                      dirent->name, name_len,
293                                      objectid, idata->objectid,
294                                      idata->index_cnt);
295         if (ret)
296                 goto fail;
297         idata->index_cnt++;
298         inode_size = btrfs_stack_inode_size(idata->inode) +
299                      name_len * 2;
300         btrfs_set_stack_inode_size(idata->inode, inode_size);
301         return 0;
302 fail:
303         idata->errcode = ret;
304         return BLOCK_ABORT;
305 }
306
307 static int create_dir_entries(struct btrfs_trans_handle *trans,
308                               struct btrfs_root *root, u64 objectid,
309                               struct btrfs_inode_item *btrfs_inode,
310                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
311 {
312         int ret;
313         errcode_t err;
314         struct dir_iterate_data data = {
315                 .trans          = trans,
316                 .root           = root,
317                 .inode          = btrfs_inode,
318                 .objectid       = objectid,
319                 .index_cnt      = 2,
320                 .parent         = 0,
321                 .errcode        = 0,
322         };
323
324         err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL,
325                                   dir_iterate_proc, &data);
326         if (err)
327                 goto error;
328         ret = data.errcode;
329         if (ret == 0 && data.parent == objectid) {
330                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
331                                              objectid, objectid, 0);
332         }
333         return ret;
334 error:
335         fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err));
336         return -1;
337 }
338
339 static int read_disk_extent(struct btrfs_root *root, u64 bytenr,
340                             u32 num_bytes, char *buffer)
341 {
342         int ret;
343         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
344
345         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, bytenr);
346         if (ret != num_bytes)
347                 goto fail;
348         ret = 0;
349 fail:
350         if (ret > 0)
351                 ret = -1;
352         return ret;
353 }
354 /*
355  * Record a file extent. Do all the required works, such as inserting
356  * file extent item, inserting extent item and backref item into extent
357  * tree and updating block accounting.
358  */
359 static int record_file_extent(struct btrfs_trans_handle *trans,
360                               struct btrfs_root *root, u64 objectid,
361                               struct btrfs_inode_item *inode,
362                               u64 file_pos, u64 disk_bytenr,
363                               u64 num_bytes, int checksum)
364 {
365         int ret;
366         struct btrfs_fs_info *info = root->fs_info;
367         struct btrfs_root *extent_root = info->extent_root;
368         struct extent_buffer *leaf;
369         struct btrfs_file_extent_item *fi;
370         struct btrfs_key ins_key;
371         struct btrfs_path path;
372         struct btrfs_extent_item *ei;
373         u32 blocksize = root->sectorsize;
374         u64 nbytes;
375
376         if (disk_bytenr == 0) {
377                 ret = btrfs_insert_file_extent(trans, root, objectid,
378                                                 file_pos, disk_bytenr,
379                                                 num_bytes, num_bytes);
380                 return ret;
381         }
382
383         btrfs_init_path(&path);
384
385         if (checksum) {
386                 u64 offset;
387                 char *buffer;
388
389                 ret = -ENOMEM;
390                 buffer = malloc(blocksize);
391                 if (!buffer)
392                         goto fail;
393                 for (offset = 0; offset < num_bytes; offset += blocksize) {
394                         ret = read_disk_extent(root, disk_bytenr + offset,
395                                                 blocksize, buffer);
396                         if (ret)
397                                 break;
398                         ret = btrfs_csum_file_block(trans,
399                                                     root->fs_info->csum_root,
400                                                     disk_bytenr + num_bytes,
401                                                     disk_bytenr + offset,
402                                                     buffer, blocksize);
403                         if (ret)
404                                 break;
405                 }
406                 free(buffer);
407                 if (ret)
408                         goto fail;
409         }
410
411         ins_key.objectid = objectid;
412         ins_key.offset = file_pos;
413         btrfs_set_key_type(&ins_key, BTRFS_EXTENT_DATA_KEY);
414         ret = btrfs_insert_empty_item(trans, root, &path, &ins_key,
415                                       sizeof(*fi));
416         if (ret)
417                 goto fail;
418         leaf = path.nodes[0];
419         fi = btrfs_item_ptr(leaf, path.slots[0],
420                             struct btrfs_file_extent_item);
421         btrfs_set_file_extent_generation(leaf, fi, trans->transid);
422         btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
423         btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
424         btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
425         btrfs_set_file_extent_offset(leaf, fi, 0);
426         btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
427         btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
428         btrfs_set_file_extent_compression(leaf, fi, 0);
429         btrfs_set_file_extent_encryption(leaf, fi, 0);
430         btrfs_set_file_extent_other_encoding(leaf, fi, 0);
431         btrfs_mark_buffer_dirty(leaf);
432
433         nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes;
434         btrfs_set_stack_inode_nbytes(inode, nbytes);
435
436         btrfs_release_path(&path);
437
438         ins_key.objectid = disk_bytenr;
439         ins_key.offset = num_bytes;
440         ins_key.type = BTRFS_EXTENT_ITEM_KEY;
441
442         ret = btrfs_insert_empty_item(trans, extent_root, &path,
443                                       &ins_key, sizeof(*ei));
444         if (ret == 0) {
445                 leaf = path.nodes[0];
446                 ei = btrfs_item_ptr(leaf, path.slots[0],
447                                     struct btrfs_extent_item);
448
449                 btrfs_set_extent_refs(leaf, ei, 0);
450                 btrfs_set_extent_generation(leaf, ei, 0);
451                 btrfs_set_extent_flags(leaf, ei, BTRFS_EXTENT_FLAG_DATA);
452
453                 btrfs_mark_buffer_dirty(leaf);
454
455                 ret = btrfs_update_block_group(trans, root, disk_bytenr,
456                                                num_bytes, 1, 0);
457                 if (ret)
458                         goto fail;
459         } else if (ret != -EEXIST) {
460                 goto fail;
461         }
462         btrfs_extent_post_op(trans, extent_root);
463
464         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, 0,
465                                    root->root_key.objectid,
466                                    objectid, file_pos);
467         if (ret)
468                 goto fail;
469         ret = 0;
470 fail:
471         btrfs_release_path(&path);
472         return ret;
473 }
474
475 static int record_file_blocks(struct btrfs_trans_handle *trans,
476                               struct btrfs_root *root, u64 objectid,
477                               struct btrfs_inode_item *inode,
478                               u64 file_block, u64 disk_block,
479                               u64 num_blocks, int checksum)
480 {
481         u64 file_pos = file_block * root->sectorsize;
482         u64 disk_bytenr = disk_block * root->sectorsize;
483         u64 num_bytes = num_blocks * root->sectorsize;
484         return record_file_extent(trans, root, objectid, inode, file_pos,
485                                   disk_bytenr, num_bytes, checksum);
486 }
487
488 struct blk_iterate_data {
489         struct btrfs_trans_handle *trans;
490         struct btrfs_root *root;
491         struct btrfs_inode_item *inode;
492         u64 objectid;
493         u64 first_block;
494         u64 disk_block;
495         u64 num_blocks;
496         u64 boundary;
497         int checksum;
498         int errcode;
499 };
500
501 static int block_iterate_proc(ext2_filsys ext2_fs,
502                               u64 disk_block, u64 file_block,
503                               struct blk_iterate_data *idata)
504 {
505         int ret;
506         int sb_region;
507         int do_barrier;
508         struct btrfs_root *root = idata->root;
509         struct btrfs_trans_handle *trans = idata->trans;
510         struct btrfs_block_group_cache *cache;
511         u64 bytenr = disk_block * root->sectorsize;
512
513         sb_region = intersect_with_sb(bytenr, root->sectorsize);
514         do_barrier = sb_region || disk_block >= idata->boundary;
515         if ((idata->num_blocks > 0 && do_barrier) ||
516             (file_block > idata->first_block + idata->num_blocks) ||
517             (disk_block != idata->disk_block + idata->num_blocks)) {
518                 if (idata->num_blocks > 0) {
519                         ret = record_file_blocks(trans, root, idata->objectid,
520                                         idata->inode, idata->first_block,
521                                         idata->disk_block, idata->num_blocks,
522                                         idata->checksum);
523                         if (ret)
524                                 goto fail;
525                         idata->first_block += idata->num_blocks;
526                         idata->num_blocks = 0;
527                 }
528                 if (file_block > idata->first_block) {
529                         ret = record_file_blocks(trans, root, idata->objectid,
530                                         idata->inode, idata->first_block,
531                                         0, file_block - idata->first_block,
532                                         idata->checksum);
533                         if (ret)
534                                 goto fail;
535                 }
536
537                 if (sb_region) {
538                         bytenr += STRIPE_LEN - 1;
539                         bytenr &= ~((u64)STRIPE_LEN - 1);
540                 } else {
541                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
542                         BUG_ON(!cache);
543                         bytenr = cache->key.objectid + cache->key.offset;
544                 }
545
546                 idata->first_block = file_block;
547                 idata->disk_block = disk_block;
548                 idata->boundary = bytenr / root->sectorsize;
549         }
550         idata->num_blocks++;
551         return 0;
552 fail:
553         idata->errcode = ret;
554         return BLOCK_ABORT;
555 }
556
557 static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr,
558                                 e2_blkcnt_t blockcnt, blk_t ref_block,
559                                 int ref_offset, void *priv_data)
560 {
561         struct blk_iterate_data *idata;
562         idata = (struct blk_iterate_data *)priv_data;
563         return block_iterate_proc(fs, *blocknr, blockcnt, idata);
564 }
565
566 /*
567  * traverse file's data blocks, record these data blocks as file extents.
568  */
569 static int create_file_extents(struct btrfs_trans_handle *trans,
570                                struct btrfs_root *root, u64 objectid,
571                                struct btrfs_inode_item *btrfs_inode,
572                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
573                                int datacsum, int packing)
574 {
575         int ret;
576         char *buffer = NULL;
577         errcode_t err;
578         u32 last_block;
579         u32 sectorsize = root->sectorsize;
580         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
581         struct blk_iterate_data data = {
582                 .trans          = trans,
583                 .root           = root,
584                 .inode          = btrfs_inode,
585                 .objectid       = objectid,
586                 .first_block    = 0,
587                 .disk_block     = 0,
588                 .num_blocks     = 0,
589                 .boundary       = (u64)-1,
590                 .checksum       = datacsum,
591                 .errcode        = 0,
592         };
593         err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY,
594                                     NULL, __block_iterate_proc, &data);
595         if (err)
596                 goto error;
597         ret = data.errcode;
598         if (ret)
599                 goto fail;
600         if (packing && data.first_block == 0 && data.num_blocks > 0 &&
601             inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
602                 u64 num_bytes = data.num_blocks * sectorsize;
603                 u64 disk_bytenr = data.disk_block * sectorsize;
604                 u64 nbytes;
605
606                 buffer = malloc(num_bytes);
607                 if (!buffer)
608                         return -ENOMEM;
609                 ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer);
610                 if (ret)
611                         goto fail;
612                 if (num_bytes > inode_size)
613                         num_bytes = inode_size;
614                 ret = btrfs_insert_inline_extent(trans, root, objectid,
615                                                  0, buffer, num_bytes);
616                 if (ret)
617                         goto fail;
618                 nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes;
619                 btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes);
620         } else if (data.num_blocks > 0) {
621                 ret = record_file_blocks(trans, root, objectid, btrfs_inode,
622                                          data.first_block, data.disk_block,
623                                          data.num_blocks, data.checksum);
624                 if (ret)
625                         goto fail;
626         }
627         data.first_block += data.num_blocks;
628         last_block = (inode_size + sectorsize - 1) / sectorsize;
629         if (last_block > data.first_block) {
630                 ret = record_file_blocks(trans, root, objectid, btrfs_inode,
631                                          data.first_block, 0, last_block -
632                                          data.first_block, data.checksum);
633         }
634 fail:
635         if (buffer)
636                 free(buffer);
637         return ret;
638 error:
639         fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err));
640         return -1;
641 }
642
643 static int create_symbol_link(struct btrfs_trans_handle *trans,
644                               struct btrfs_root *root, u64 objectid,
645                               struct btrfs_inode_item *btrfs_inode,
646                               ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
647                               struct ext2_inode *ext2_inode)
648 {
649         int ret;
650         char *pathname;
651         u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
652         if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) {
653                 btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1);
654                 ret = create_file_extents(trans, root, objectid, btrfs_inode,
655                                           ext2_fs, ext2_ino, 1, 1);
656                 btrfs_set_stack_inode_size(btrfs_inode, inode_size);
657                 return ret;
658         }
659
660         pathname = (char *)&(ext2_inode->i_block[0]);
661         BUG_ON(pathname[inode_size] != 0);
662         ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
663                                          pathname, inode_size + 1);
664         btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1);
665         return ret;
666 }
667
668 /*
669  * Following xattr/acl related codes are based on codes in
670  * fs/ext3/xattr.c and fs/ext3/acl.c
671  */
672 #define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr))
673 #define EXT2_XATTR_BFIRST(ptr) \
674         ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1))
675 #define EXT2_XATTR_IHDR(inode) \
676         ((struct ext2_ext_attr_header *) ((void *)(inode) + \
677                 EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize))
678 #define EXT2_XATTR_IFIRST(inode) \
679         ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \
680                 sizeof(EXT2_XATTR_IHDR(inode)->h_magic)))
681
682 static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry,
683                                   const void *end)
684 {
685         struct ext2_ext_attr_entry *next;
686
687         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
688                 next = EXT2_EXT_ATTR_NEXT(entry);
689                 if ((void *)next >= end)
690                         return -EIO;
691                 entry = next;
692         }
693         return 0;
694 }
695
696 static int ext2_xattr_check_block(const char *buf, size_t size)
697 {
698         int error;
699         struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf);
700
701         if (header->h_magic != EXT2_EXT_ATTR_MAGIC ||
702             header->h_blocks != 1)
703                 return -EIO;
704         error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size);
705         return error;
706 }
707
708 static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry,
709                                   size_t size)
710 {
711         size_t value_size = entry->e_value_size;
712
713         if (entry->e_value_block != 0 || value_size > size ||
714             entry->e_value_offs + value_size > size)
715                 return -EIO;
716         return 0;
717 }
718
719 #define EXT2_ACL_VERSION        0x0001
720
721 typedef struct {
722         __le16          e_tag;
723         __le16          e_perm;
724         __le32          e_id;
725 } ext2_acl_entry;
726
727 typedef struct {
728         __le16          e_tag;
729         __le16          e_perm;
730 } ext2_acl_entry_short;
731
732 typedef struct {
733         __le32          a_version;
734 } ext2_acl_header;
735
736 static inline int ext2_acl_count(size_t size)
737 {
738         ssize_t s;
739         size -= sizeof(ext2_acl_header);
740         s = size - 4 * sizeof(ext2_acl_entry_short);
741         if (s < 0) {
742                 if (size % sizeof(ext2_acl_entry_short))
743                         return -1;
744                 return size / sizeof(ext2_acl_entry_short);
745         } else {
746                 if (s % sizeof(ext2_acl_entry))
747                         return -1;
748                 return s / sizeof(ext2_acl_entry) + 4;
749         }
750 }
751
752 #define ACL_EA_VERSION          0x0002
753
754 typedef struct {
755         __le16          e_tag;
756         __le16          e_perm;
757         __le32          e_id;
758 } acl_ea_entry;
759
760 typedef struct {
761         __le32          a_version;
762         acl_ea_entry    a_entries[0];
763 } acl_ea_header;
764
765 static inline size_t acl_ea_size(int count)
766 {
767         return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry);
768 }
769
770 static int ext2_acl_to_xattr(void *dst, const void *src,
771                              size_t dst_size, size_t src_size)
772 {
773         int i, count;
774         const void *end = src + src_size;
775         acl_ea_header *ext_acl = (acl_ea_header *)dst;
776         acl_ea_entry *dst_entry = ext_acl->a_entries;
777         ext2_acl_entry *src_entry;
778
779         if (src_size < sizeof(ext2_acl_header))
780                 goto fail;
781         if (((ext2_acl_header *)src)->a_version !=
782             cpu_to_le32(EXT2_ACL_VERSION))
783                 goto fail;
784         src += sizeof(ext2_acl_header);
785         count = ext2_acl_count(src_size);
786         if (count <= 0)
787                 goto fail;
788
789         BUG_ON(dst_size < acl_ea_size(count));
790         ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION);
791         for (i = 0; i < count; i++, dst_entry++) {
792                 src_entry = (ext2_acl_entry *)src;
793                 if (src + sizeof(ext2_acl_entry_short) > end)
794                         goto fail;
795                 dst_entry->e_tag = src_entry->e_tag;
796                 dst_entry->e_perm = src_entry->e_perm;
797                 switch (le16_to_cpu(src_entry->e_tag)) {
798                 case ACL_USER_OBJ:
799                 case ACL_GROUP_OBJ:
800                 case ACL_MASK:
801                 case ACL_OTHER:
802                         src += sizeof(ext2_acl_entry_short);
803                         dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
804                         break;
805                 case ACL_USER:
806                 case ACL_GROUP:
807                         src += sizeof(ext2_acl_entry);
808                         if (src > end)
809                                 goto fail;
810                         dst_entry->e_id = src_entry->e_id;
811                         break;
812                 default:
813                         goto fail;
814                 }
815         }
816         if (src != end)
817                 goto fail;
818         return 0;
819 fail:
820         return -EINVAL;
821 }
822
823 static char *xattr_prefix_table[] = {
824         [1] =   "user.",
825         [2] =   "system.posix_acl_access",
826         [3] =   "system.posix_acl_default",
827         [4] =   "trusted.",
828         [6] =   "security.",
829 };
830
831 static int copy_single_xattr(struct btrfs_trans_handle *trans,
832                              struct btrfs_root *root, u64 objectid,
833                              struct ext2_ext_attr_entry *entry,
834                              const void *data, u32 datalen)
835 {
836         int ret = 0;
837         int name_len;
838         int name_index;
839         void *databuf = NULL;
840         char namebuf[XATTR_NAME_MAX + 1];
841
842         name_index = entry->e_name_index;
843         if (name_index >= ARRAY_SIZE(xattr_prefix_table) ||
844             xattr_prefix_table[name_index] == NULL)
845                 return -EOPNOTSUPP;
846         name_len = strlen(xattr_prefix_table[name_index]) +
847                    entry->e_name_len;
848         if (name_len >= sizeof(namebuf))
849                 return -ERANGE;
850
851         if (name_index == 2 || name_index == 3) {
852                 size_t bufsize = acl_ea_size(ext2_acl_count(datalen));
853                 databuf = malloc(bufsize);
854                 if (!databuf)
855                        return -ENOMEM;
856                 ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen);
857                 if (ret)
858                         goto out;
859                 data = databuf;
860                 datalen = bufsize;
861         }
862         strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX);
863         strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len);
864         if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) -
865             sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) {
866                 fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n",
867                         objectid - INO_OFFSET, name_len, namebuf);
868                 goto out;
869         }
870         ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len,
871                                       data, datalen, objectid);
872 out:
873         if (databuf)
874                 free(databuf);
875         return ret;
876 }
877
878 static int copy_extended_attrs(struct btrfs_trans_handle *trans,
879                                struct btrfs_root *root, u64 objectid,
880                                struct btrfs_inode_item *btrfs_inode,
881                                ext2_filsys ext2_fs, ext2_ino_t ext2_ino)
882 {
883         int ret = 0;
884         int inline_ea = 0;
885         errcode_t err;
886         u32 datalen;
887         u32 block_size = ext2_fs->blocksize;
888         u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super);
889         struct ext2_inode_large *ext2_inode;
890         struct ext2_ext_attr_entry *entry;
891         void *data;
892         char *buffer = NULL;
893         char inode_buf[EXT2_GOOD_OLD_INODE_SIZE];
894
895         if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) {
896                 ext2_inode = (struct ext2_inode_large *)inode_buf;
897         } else {
898                 ext2_inode = (struct ext2_inode_large *)malloc(inode_size);
899                 if (!ext2_inode)
900                        return -ENOMEM;
901         }
902         err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode,
903                                      inode_size);
904         if (err) {
905                 fprintf(stderr, "ext2fs_read_inode_full: %s\n",
906                         error_message(err));
907                 ret = -1;
908                 goto out;
909         }
910
911         if (ext2_ino > ext2_fs->super->s_first_ino &&
912             inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
913                 if (EXT2_GOOD_OLD_INODE_SIZE +
914                     ext2_inode->i_extra_isize > inode_size) {
915                         ret = -EIO;
916                         goto out;
917                 }
918                 if (ext2_inode->i_extra_isize != 0 &&
919                     EXT2_XATTR_IHDR(ext2_inode)->h_magic ==
920                     EXT2_EXT_ATTR_MAGIC) {
921                         inline_ea = 1;
922                 }
923         }
924         if (inline_ea) {
925                 int total;
926                 void *end = (void *)ext2_inode + inode_size;
927                 entry = EXT2_XATTR_IFIRST(ext2_inode);
928                 total = end - (void *)entry;
929                 ret = ext2_xattr_check_names(entry, end);
930                 if (ret)
931                         goto out;
932                 while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
933                         ret = ext2_xattr_check_entry(entry, total);
934                         if (ret)
935                                 goto out;
936                         data = (void *)EXT2_XATTR_IFIRST(ext2_inode) +
937                                 entry->e_value_offs;
938                         datalen = entry->e_value_size;
939                         ret = copy_single_xattr(trans, root, objectid,
940                                                 entry, data, datalen);
941                         if (ret)
942                                 goto out;
943                         entry = EXT2_EXT_ATTR_NEXT(entry);
944                 }
945         }
946
947         if (ext2_inode->i_file_acl == 0)
948                 goto out;
949
950         buffer = malloc(block_size);
951         if (!buffer) {
952                 ret = -ENOMEM;
953                 goto out;
954         }
955         err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer);
956         if (err) {
957                 fprintf(stderr, "ext2fs_read_ext_attr: %s\n",
958                         error_message(err));
959                 ret = -1;
960                 goto out;
961         }
962         ret = ext2_xattr_check_block(buffer, block_size);
963         if (ret)
964                 goto out;
965
966         entry = EXT2_XATTR_BFIRST(buffer);
967         while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
968                 ret = ext2_xattr_check_entry(entry, block_size);
969                 if (ret)
970                         goto out;
971                 data = buffer + entry->e_value_offs;
972                 datalen = entry->e_value_size;
973                 ret = copy_single_xattr(trans, root, objectid,
974                                         entry, data, datalen);
975                 if (ret)
976                         goto out;
977                 entry = EXT2_EXT_ATTR_NEXT(entry);
978         }
979 out:
980         if (buffer != NULL)
981                 free(buffer);
982         if ((void *)ext2_inode != inode_buf)
983                 free(ext2_inode);
984         return ret;
985 }
986 #define MINORBITS       20
987 #define MKDEV(ma, mi)   (((ma) << MINORBITS) | (mi))
988
989 static inline dev_t old_decode_dev(u16 val)
990 {
991         return MKDEV((val >> 8) & 255, val & 255);
992 }
993
994 static inline dev_t new_decode_dev(u32 dev)
995 {
996         unsigned major = (dev & 0xfff00) >> 8;
997         unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
998         return MKDEV(major, minor);
999 }
1000
1001 static int copy_inode_item(struct btrfs_inode_item *dst,
1002                            struct ext2_inode *src, u32 blocksize)
1003 {
1004         btrfs_set_stack_inode_generation(dst, 1);
1005         btrfs_set_stack_inode_size(dst, src->i_size);
1006         btrfs_set_stack_inode_nbytes(dst, 0);
1007         btrfs_set_stack_inode_block_group(dst, 0);
1008         btrfs_set_stack_inode_nlink(dst, src->i_links_count);
1009         btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16));
1010         btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16));
1011         btrfs_set_stack_inode_mode(dst, src->i_mode);
1012         btrfs_set_stack_inode_rdev(dst, 0);
1013         btrfs_set_stack_inode_flags(dst, 0);
1014         btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime);
1015         btrfs_set_stack_timespec_nsec(&dst->atime, 0);
1016         btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime);
1017         btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
1018         btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime);
1019         btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
1020         btrfs_set_stack_timespec_sec(&dst->otime, 0);
1021         btrfs_set_stack_timespec_nsec(&dst->otime, 0);
1022
1023         if (S_ISDIR(src->i_mode)) {
1024                 btrfs_set_stack_inode_size(dst, 0);
1025                 btrfs_set_stack_inode_nlink(dst, 1);
1026         }
1027         if (S_ISREG(src->i_mode)) {
1028                 btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 |
1029                                            (u64)src->i_size);
1030         }
1031         if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) &&
1032             !S_ISLNK(src->i_mode)) {
1033                 if (src->i_block[0]) {
1034                         btrfs_set_stack_inode_rdev(dst,
1035                                 old_decode_dev(src->i_block[0]));
1036                 } else {
1037                         btrfs_set_stack_inode_rdev(dst,
1038                                 new_decode_dev(src->i_block[1]));
1039                 }
1040         }
1041         return 0;
1042 }
1043
1044 /*
1045  * copy a single inode. do all the required works, such as cloning
1046  * inode item, creating file extents and creating directory entries.
1047  */
1048 static int copy_single_inode(struct btrfs_trans_handle *trans,
1049                              struct btrfs_root *root, u64 objectid,
1050                              ext2_filsys ext2_fs, ext2_ino_t ext2_ino,
1051                              struct ext2_inode *ext2_inode,
1052                              int datacsum, int packing, int noxattr)
1053 {
1054         int ret;
1055         struct btrfs_key inode_key;
1056         struct btrfs_inode_item btrfs_inode;
1057
1058         if (ext2_inode->i_links_count == 0)
1059                 return 0;
1060
1061         copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize);
1062         if (!datacsum && S_ISREG(ext2_inode->i_mode)) {
1063                 u32 flags = btrfs_stack_inode_flags(&btrfs_inode) |
1064                             BTRFS_INODE_NODATASUM;
1065                 btrfs_set_stack_inode_flags(&btrfs_inode, flags);
1066         }
1067
1068         switch (ext2_inode->i_mode & S_IFMT) {
1069         case S_IFREG:
1070                 ret = create_file_extents(trans, root, objectid, &btrfs_inode,
1071                                         ext2_fs, ext2_ino, datacsum, packing);
1072                 break;
1073         case S_IFDIR:
1074                 ret = create_dir_entries(trans, root, objectid, &btrfs_inode,
1075                                          ext2_fs, ext2_ino);
1076                 break;
1077         case S_IFLNK:
1078                 ret = create_symbol_link(trans, root, objectid, &btrfs_inode,
1079                                          ext2_fs, ext2_ino, ext2_inode);
1080                 break;
1081         default:
1082                 ret = 0;
1083                 break;
1084         }
1085         if (ret)
1086                 return ret;
1087
1088         if (!noxattr) {
1089                 ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode,
1090                                           ext2_fs, ext2_ino);
1091                 if (ret)
1092                         return ret;
1093         }
1094         inode_key.objectid = objectid;
1095         inode_key.offset = 0;
1096         btrfs_set_key_type(&inode_key, BTRFS_INODE_ITEM_KEY);
1097         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1098         return ret;
1099 }
1100
1101 static int copy_disk_extent(struct btrfs_root *root, u64 dst_bytenr,
1102                             u64 src_bytenr, u32 num_bytes)
1103 {
1104         int ret;
1105         char *buffer;
1106         struct btrfs_fs_devices *fs_devs = root->fs_info->fs_devices;
1107
1108         buffer = malloc(num_bytes);
1109         if (!buffer)
1110                 return -ENOMEM;
1111         ret = pread(fs_devs->latest_bdev, buffer, num_bytes, src_bytenr);
1112         if (ret != num_bytes)
1113                 goto fail;
1114         ret = pwrite(fs_devs->latest_bdev, buffer, num_bytes, dst_bytenr);
1115         if (ret != num_bytes)
1116                 goto fail;
1117         ret = 0;
1118 fail:
1119         free(buffer);
1120         if (ret > 0)
1121                 ret = -1;
1122         return ret;
1123 }
1124 /*
1125  * scan ext2's inode bitmap and copy all used inodes.
1126  */
1127 static int copy_inodes(struct btrfs_root *root, ext2_filsys ext2_fs,
1128                        int datacsum, int packing, int noxattr)
1129 {
1130         int ret;
1131         errcode_t err;
1132         ext2_inode_scan ext2_scan;
1133         struct ext2_inode ext2_inode;
1134         ext2_ino_t ext2_ino;
1135         u64 objectid;
1136         struct btrfs_trans_handle *trans;
1137
1138         trans = btrfs_start_transaction(root, 1);
1139         if (!trans)
1140                 return -ENOMEM;
1141         err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan);
1142         if (err) {
1143                 fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err));
1144                 return -1;
1145         }
1146         while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino,
1147                                              &ext2_inode))) {
1148                 /* no more inodes */
1149                 if (ext2_ino == 0)
1150                         break;
1151                 /* skip special inode in ext2fs */
1152                 if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO &&
1153                     ext2_ino != EXT2_ROOT_INO)
1154                         continue;
1155                 objectid = ext2_ino + INO_OFFSET;
1156                 ret = copy_single_inode(trans, root,
1157                                         objectid, ext2_fs, ext2_ino,
1158                                         &ext2_inode, datacsum, packing,
1159                                         noxattr);
1160                 if (ret)
1161                         return ret;
1162                 if (trans->blocks_used >= 4096) {
1163                         ret = btrfs_commit_transaction(trans, root);
1164                         BUG_ON(ret);
1165                         trans = btrfs_start_transaction(root, 1);
1166                         BUG_ON(!trans);
1167                 }
1168         }
1169         if (err) {
1170                 fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err));
1171                 return -1;
1172         }
1173         ret = btrfs_commit_transaction(trans, root);
1174         BUG_ON(ret);
1175
1176         return ret;
1177 }
1178
1179 /*
1180  * Construct a range of ext2fs image file.
1181  * scan block allocation bitmap, find all blocks used by the ext2fs
1182  * in this range and create file extents that point to these blocks.
1183  *
1184  * Note: Before calling the function, no file extent points to blocks
1185  *       in this range
1186  */
1187 static int create_image_file_range(struct btrfs_trans_handle *trans,
1188                                    struct btrfs_root *root, u64 objectid,
1189                                    struct btrfs_inode_item *inode,
1190                                    u64 start_byte, u64 end_byte,
1191                                    ext2_filsys ext2_fs)
1192 {
1193         u32 blocksize = ext2_fs->blocksize;
1194         u32 block = start_byte / blocksize;
1195         u32 last_block = (end_byte + blocksize - 1) / blocksize;
1196         int ret = 0;
1197         struct blk_iterate_data data = {
1198                 .trans          = trans,
1199                 .root           = root,
1200                 .inode          = inode,
1201                 .objectid       = objectid,
1202                 .first_block    = block,
1203                 .disk_block     = 0,
1204                 .num_blocks     = 0,
1205                 .boundary       = (u64)-1,
1206                 .checksum       = 0,
1207                 .errcode        = 0,
1208         };
1209         for (; start_byte < end_byte; block++, start_byte += blocksize) {
1210                 if (!ext2fs_fast_test_block_bitmap(ext2_fs->block_map, block))
1211                         continue;
1212                 ret = block_iterate_proc(NULL, block, block, &data);
1213                 if (ret & BLOCK_ABORT) {
1214                         ret = data.errcode;
1215                         goto fail;
1216                 }
1217         }
1218         if (data.num_blocks > 0) {
1219                 ret = record_file_blocks(trans, root, objectid, inode,
1220                                          data.first_block, data.disk_block,
1221                                          data.num_blocks, 0);
1222                 if (ret)
1223                         goto fail;
1224                 data.first_block += data.num_blocks;
1225         }
1226         if (last_block > data.first_block) {
1227                 ret = record_file_blocks(trans, root, objectid, inode,
1228                                          data.first_block, 0, last_block -
1229                                          data.first_block, 0);
1230                 if (ret)
1231                         goto fail;
1232         }
1233 fail:
1234         return ret;
1235 }
1236 /*
1237  * Create the ext2fs image file.
1238  */
1239 static int create_ext2_image(struct btrfs_root *root, ext2_filsys ext2_fs,
1240                              const char *name)
1241 {
1242         int ret;
1243         struct btrfs_key key;
1244         struct btrfs_key location;
1245         struct btrfs_path path;
1246         struct btrfs_inode_item btrfs_inode;
1247         struct btrfs_inode_item *inode_item;
1248         struct extent_buffer *leaf;
1249         struct btrfs_fs_info *fs_info = root->fs_info;
1250         struct btrfs_root *extent_root = fs_info->extent_root;
1251         struct btrfs_trans_handle *trans;
1252         struct btrfs_extent_item *ei;
1253         struct btrfs_extent_inline_ref *iref;
1254         struct btrfs_extent_data_ref *dref;
1255         u64 bytenr;
1256         u64 num_bytes;
1257         u64 objectid;
1258         u64 last_byte;
1259         u64 first_free;
1260         u64 total_bytes;
1261         u32 sectorsize = root->sectorsize;
1262
1263         total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
1264         first_free =  BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1;
1265         first_free &= ~((u64)sectorsize - 1);
1266
1267         memset(&btrfs_inode, 0, sizeof(btrfs_inode));
1268         btrfs_set_stack_inode_generation(&btrfs_inode, 1);
1269         btrfs_set_stack_inode_size(&btrfs_inode, total_bytes);
1270         btrfs_set_stack_inode_nlink(&btrfs_inode, 1);
1271         btrfs_set_stack_inode_nbytes(&btrfs_inode, 0);
1272         btrfs_set_stack_inode_mode(&btrfs_inode, S_IFREG | 0400);
1273         btrfs_set_stack_inode_flags(&btrfs_inode, BTRFS_INODE_NODATASUM |
1274                                     BTRFS_INODE_READONLY);
1275         btrfs_init_path(&path);
1276         trans = btrfs_start_transaction(root, 1);
1277         BUG_ON(!trans);
1278
1279         objectid = btrfs_root_dirid(&root->root_item);
1280         ret = btrfs_find_free_objectid(trans, root, objectid, &objectid);
1281         if (ret)
1282                 goto fail;
1283
1284         /*
1285          * copy blocks covered by extent #0 to new positions. extent #0 is
1286          * special, we can't rely on relocate_extents_range to relocate it.
1287          */
1288         for (last_byte = 0; last_byte < first_free; last_byte += sectorsize) {
1289                 ret = custom_alloc_extent(root, sectorsize, 0, &key);
1290                 if (ret)
1291                         goto fail;
1292                 ret = copy_disk_extent(root, key.objectid, last_byte,
1293                                        sectorsize);
1294                 if (ret)
1295                         goto fail;
1296                 ret = record_file_extent(trans, root, objectid,
1297                                          &btrfs_inode, last_byte,
1298                                          key.objectid, sectorsize, 0);
1299                 if (ret)
1300                         goto fail;
1301         }
1302
1303         while(1) {
1304                 key.objectid = last_byte;
1305                 key.offset = 0;
1306                 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1307                 ret = btrfs_search_slot(trans, fs_info->extent_root,
1308                                         &key, &path, 0, 0);
1309                 if (ret < 0)
1310                         goto fail;
1311 next:
1312                 leaf = path.nodes[0];
1313                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1314                         ret = btrfs_next_leaf(extent_root, &path);
1315                         if (ret < 0)
1316                                 goto fail;
1317                         if (ret > 0)
1318                                 break;
1319                         leaf = path.nodes[0];
1320                 }
1321                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1322                 if (last_byte > key.objectid ||
1323                     key.type != BTRFS_EXTENT_ITEM_KEY) {
1324                         path.slots[0]++;
1325                         goto next;
1326                 }
1327
1328                 bytenr = key.objectid;
1329                 num_bytes = key.offset;
1330                 ei = btrfs_item_ptr(leaf, path.slots[0],
1331                                     struct btrfs_extent_item);
1332                 if (!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_DATA)) {
1333                         path.slots[0]++;
1334                         goto next;
1335                 }
1336
1337                 BUG_ON(btrfs_item_size_nr(leaf, path.slots[0]) != sizeof(*ei) +
1338                        btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY));
1339
1340                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
1341                 key.type = btrfs_extent_inline_ref_type(leaf, iref);
1342                 BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
1343                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1344                 if (btrfs_extent_data_ref_root(leaf, dref) !=
1345                     BTRFS_FS_TREE_OBJECTID) {
1346                         path.slots[0]++;
1347                         goto next;
1348                 }
1349
1350                 if (bytenr > last_byte) {
1351                         ret = create_image_file_range(trans, root, objectid,
1352                                                       &btrfs_inode, last_byte,
1353                                                       bytenr, ext2_fs);
1354                         if (ret)
1355                                 goto fail;
1356                 }
1357                 ret = record_file_extent(trans, root, objectid, &btrfs_inode,
1358                                          bytenr, bytenr, num_bytes, 0);
1359                 if (ret)
1360                         goto fail;
1361                 last_byte = bytenr + num_bytes;
1362                 btrfs_release_path(&path);
1363
1364                 if (trans->blocks_used >= 4096) {
1365                         ret = btrfs_commit_transaction(trans, root);
1366                         BUG_ON(ret);
1367                         trans = btrfs_start_transaction(root, 1);
1368                         BUG_ON(!trans);
1369                 }
1370         }
1371         btrfs_release_path(&path);
1372         if (total_bytes > last_byte) {
1373                 ret = create_image_file_range(trans, root, objectid,
1374                                               &btrfs_inode, last_byte,
1375                                               total_bytes, ext2_fs);
1376                 if (ret)
1377                         goto fail;
1378         }
1379
1380         ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
1381         if (ret)
1382                 goto fail;
1383
1384         location.objectid = objectid;
1385         location.offset = 0;
1386         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1387         ret = btrfs_insert_dir_item(trans, root, name, strlen(name),
1388                                     btrfs_root_dirid(&root->root_item),
1389                                     &location, EXT2_FT_REG_FILE, objectid);
1390         if (ret)
1391                 goto fail;
1392         ret = btrfs_insert_inode_ref(trans, root, name, strlen(name),
1393                                      objectid,
1394                                      btrfs_root_dirid(&root->root_item),
1395                                      objectid);
1396         if (ret)
1397                 goto fail;
1398         location.objectid = btrfs_root_dirid(&root->root_item);
1399         location.offset = 0;
1400         btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
1401         ret = btrfs_lookup_inode(trans, root, &path, &location, 1);
1402         if (ret)
1403                 goto fail;
1404         leaf = path.nodes[0];
1405         inode_item = btrfs_item_ptr(leaf, path.slots[0],
1406                                     struct btrfs_inode_item);
1407         btrfs_set_inode_size(leaf, inode_item, strlen(name) * 2 +
1408                              btrfs_inode_size(leaf, inode_item));
1409         btrfs_mark_buffer_dirty(leaf);
1410         btrfs_release_path(&path);
1411         ret = btrfs_commit_transaction(trans, root);
1412         BUG_ON(ret);
1413 fail:
1414         btrfs_release_path(&path);
1415         return ret;
1416 }
1417
1418 static struct btrfs_root * link_subvol(struct btrfs_root *root,
1419                 const char *base, u64 root_objectid)
1420 {
1421         struct btrfs_trans_handle *trans;
1422         struct btrfs_fs_info *fs_info = root->fs_info;
1423         struct btrfs_root *tree_root = fs_info->tree_root;
1424         struct btrfs_root *new_root = NULL;
1425         struct btrfs_path *path;
1426         struct btrfs_inode_item *inode_item;
1427         struct extent_buffer *leaf;
1428         struct btrfs_key key;
1429         u64 dirid = btrfs_root_dirid(&root->root_item);
1430         u64 index = 2;
1431         char buf[64];
1432         int i;
1433         int ret;
1434
1435         path = btrfs_alloc_path();
1436         BUG_ON(!path);
1437
1438         key.objectid = dirid;
1439         key.type = BTRFS_DIR_INDEX_KEY;
1440         key.offset = (u64)-1;
1441
1442         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1443         BUG_ON(ret <= 0);
1444
1445         if (path->slots[0] > 0) {
1446                 path->slots[0]--;
1447                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1448                 if (key.objectid == dirid && key.type == BTRFS_DIR_INDEX_KEY)
1449                         index = key.offset + 1;
1450         }
1451         btrfs_release_path(path);
1452
1453         trans = btrfs_start_transaction(root, 1);
1454         BUG_ON(!trans);
1455
1456         key.objectid = dirid;
1457         key.offset = 0;
1458         key.type =  BTRFS_INODE_ITEM_KEY;
1459
1460         ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1461         BUG_ON(ret);
1462         leaf = path->nodes[0];
1463         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1464                                     struct btrfs_inode_item);
1465
1466         key.objectid = root_objectid;
1467         key.offset = (u64)-1;
1468         key.type = BTRFS_ROOT_ITEM_KEY;
1469
1470         strcpy(buf, base);
1471         for (i = 0; i < 1024; i++) {
1472                 ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf),
1473                                             dirid, &key, BTRFS_FT_DIR, index);
1474                 if (ret != -EEXIST)
1475                         break;
1476                 sprintf(buf, "%s%d", base, i);
1477         }
1478         if (ret)
1479                 goto fail;
1480
1481         btrfs_set_inode_size(leaf, inode_item, strlen(buf) * 2 +
1482                              btrfs_inode_size(leaf, inode_item));
1483         btrfs_mark_buffer_dirty(leaf);
1484         btrfs_release_path(path);
1485
1486         /* add the backref first */
1487         ret = btrfs_add_root_ref(trans, tree_root, root_objectid,
1488                                  BTRFS_ROOT_BACKREF_KEY,
1489                                  root->root_key.objectid,
1490                                  dirid, index, buf, strlen(buf));
1491         BUG_ON(ret);
1492
1493         /* now add the forward ref */
1494         ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid,
1495                                  BTRFS_ROOT_REF_KEY, root_objectid,
1496                                  dirid, index, buf, strlen(buf));
1497
1498         ret = btrfs_commit_transaction(trans, root);
1499         BUG_ON(ret);
1500
1501         new_root = btrfs_read_fs_root(fs_info, &key);
1502         if (IS_ERR(new_root))
1503                 new_root = NULL;
1504 fail:
1505         btrfs_free_path(path);
1506         return new_root;
1507 }
1508
1509 static int create_chunk_mapping(struct btrfs_trans_handle *trans,
1510                                 struct btrfs_root *root)
1511 {
1512         struct btrfs_fs_info *info = root->fs_info;
1513         struct btrfs_root *chunk_root = info->chunk_root;
1514         struct btrfs_root *extent_root = info->extent_root;
1515         struct btrfs_device *device;
1516         struct btrfs_block_group_cache *cache;
1517         struct btrfs_dev_extent *extent;
1518         struct extent_buffer *leaf;
1519         struct btrfs_chunk chunk;
1520         struct btrfs_key key;
1521         struct btrfs_path path;
1522         u64 cur_start;
1523         u64 total_bytes;
1524         u64 chunk_objectid;
1525         int ret;
1526
1527         btrfs_init_path(&path);
1528
1529         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1530         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1531
1532         BUG_ON(list_empty(&info->fs_devices->devices));
1533         device = list_entry(info->fs_devices->devices.next,
1534                             struct btrfs_device, dev_list);
1535         BUG_ON(device->devid != info->fs_devices->latest_devid);
1536
1537         /* delete device extent created by make_btrfs */
1538         key.objectid = device->devid;
1539         key.offset = 0;
1540         key.type = BTRFS_DEV_EXTENT_KEY;
1541         ret = btrfs_search_slot(trans, device->dev_root, &key, &path, -1, 1);
1542         if (ret < 0)
1543                 goto err;
1544
1545         BUG_ON(ret > 0);
1546         ret = btrfs_del_item(trans, device->dev_root, &path);
1547         if (ret)
1548                 goto err;
1549         btrfs_release_path(&path);
1550
1551         /* delete chunk item created by make_btrfs */
1552         key.objectid = chunk_objectid;
1553         key.offset = 0;
1554         key.type = BTRFS_CHUNK_ITEM_KEY;
1555         ret = btrfs_search_slot(trans, chunk_root, &key, &path, -1, 1);
1556         if (ret < 0)
1557                 goto err;
1558
1559         BUG_ON(ret > 0);
1560         ret = btrfs_del_item(trans, chunk_root, &path);
1561         if (ret)
1562                 goto err;
1563         btrfs_release_path(&path);
1564
1565         /* for each block group, create device extent and chunk item */
1566         cur_start = 0;
1567         while (cur_start < total_bytes) {
1568                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
1569                 BUG_ON(!cache);
1570
1571                 /* insert device extent */
1572                 key.objectid = device->devid;
1573                 key.offset = cache->key.objectid;
1574                 key.type = BTRFS_DEV_EXTENT_KEY;
1575                 ret = btrfs_insert_empty_item(trans, device->dev_root, &path,
1576                                               &key, sizeof(*extent));
1577                 if (ret)
1578                         goto err;
1579
1580                 leaf = path.nodes[0];
1581                 extent = btrfs_item_ptr(leaf, path.slots[0],
1582                                         struct btrfs_dev_extent);
1583
1584                 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1585                                                 chunk_root->root_key.objectid);
1586                 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1587                                                     chunk_objectid);
1588                 btrfs_set_dev_extent_chunk_offset(leaf, extent,
1589                                                   cache->key.objectid);
1590                 btrfs_set_dev_extent_length(leaf, extent, cache->key.offset);
1591                 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1592                     (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
1593                     BTRFS_UUID_SIZE);
1594                 btrfs_mark_buffer_dirty(leaf);
1595                 btrfs_release_path(&path);
1596
1597                 /* insert chunk item */
1598                 btrfs_set_stack_chunk_length(&chunk, cache->key.offset);
1599                 btrfs_set_stack_chunk_owner(&chunk,
1600                                             extent_root->root_key.objectid);
1601                 btrfs_set_stack_chunk_stripe_len(&chunk, STRIPE_LEN);
1602                 btrfs_set_stack_chunk_type(&chunk, cache->flags);
1603                 btrfs_set_stack_chunk_io_align(&chunk, device->io_align);
1604                 btrfs_set_stack_chunk_io_width(&chunk, device->io_width);
1605                 btrfs_set_stack_chunk_sector_size(&chunk, device->sector_size);
1606                 btrfs_set_stack_chunk_num_stripes(&chunk, 1);
1607                 btrfs_set_stack_chunk_sub_stripes(&chunk, 0);
1608                 btrfs_set_stack_stripe_devid(&chunk.stripe, device->devid);
1609                 btrfs_set_stack_stripe_offset(&chunk.stripe,
1610                                               cache->key.objectid);
1611                 memcpy(&chunk.stripe.dev_uuid, device->uuid, BTRFS_UUID_SIZE);
1612
1613                 key.objectid = chunk_objectid;
1614                 key.offset = cache->key.objectid;
1615                 key.type = BTRFS_CHUNK_ITEM_KEY;
1616
1617                 ret = btrfs_insert_item(trans, chunk_root, &key, &chunk,
1618                                         btrfs_chunk_item_size(1));
1619                 if (ret)
1620                         goto err;
1621
1622                 cur_start = cache->key.objectid + cache->key.offset;
1623         }
1624
1625         device->bytes_used = total_bytes;
1626         ret = btrfs_update_device(trans, device);
1627 err:
1628         btrfs_release_path(&path);
1629         return ret;
1630 }
1631
1632 static int create_subvol(struct btrfs_trans_handle *trans,
1633                          struct btrfs_root *root, u64 root_objectid)
1634 {
1635         struct extent_buffer *tmp;
1636         struct btrfs_root *new_root;
1637         struct btrfs_key key;
1638         struct btrfs_root_item root_item;
1639         int ret;
1640
1641         ret = btrfs_copy_root(trans, root, root->node, &tmp,
1642                               root_objectid);
1643         BUG_ON(ret);
1644
1645         memcpy(&root_item, &root->root_item, sizeof(root_item));
1646         btrfs_set_root_bytenr(&root_item, tmp->start);
1647         btrfs_set_root_level(&root_item, btrfs_header_level(tmp));
1648         btrfs_set_root_generation(&root_item, trans->transid);
1649         free_extent_buffer(tmp);
1650
1651         key.objectid = root_objectid;
1652         key.type = BTRFS_ROOT_ITEM_KEY;
1653         key.offset = trans->transid;
1654         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
1655                                 &key, &root_item);
1656
1657         key.offset = (u64)-1;
1658         new_root = btrfs_read_fs_root(root->fs_info, &key);
1659         BUG_ON(!new_root || IS_ERR(new_root));
1660
1661         ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID);
1662         BUG_ON(ret);
1663
1664         return 0;
1665 }
1666
1667 static int init_btrfs(struct btrfs_root *root)
1668 {
1669         int ret;
1670         struct btrfs_key location;
1671         struct btrfs_trans_handle *trans;
1672         struct btrfs_fs_info *fs_info = root->fs_info;
1673         struct extent_buffer *tmp;
1674
1675         trans = btrfs_start_transaction(root, 1);
1676         BUG_ON(!trans);
1677         ret = btrfs_make_block_groups(trans, root);
1678         if (ret)
1679                 goto err;
1680         ret = btrfs_fix_block_accounting(trans, root);
1681         if (ret)
1682                 goto err;
1683         ret = create_chunk_mapping(trans, root);
1684         if (ret)
1685                 goto err;
1686         ret = btrfs_make_root_dir(trans, fs_info->tree_root,
1687                                   BTRFS_ROOT_TREE_DIR_OBJECTID);
1688         if (ret)
1689                 goto err;
1690         memcpy(&location, &root->root_key, sizeof(location));
1691         location.offset = (u64)-1;
1692         ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7,
1693                                 btrfs_super_root_dir(fs_info->super_copy),
1694                                 &location, BTRFS_FT_DIR, 0);
1695         if (ret)
1696                 goto err;
1697         ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7,
1698                                 location.objectid,
1699                                 btrfs_super_root_dir(fs_info->super_copy), 0);
1700         if (ret)
1701                 goto err;
1702         btrfs_set_root_dirid(&fs_info->fs_root->root_item,
1703                              BTRFS_FIRST_FREE_OBJECTID);
1704
1705         /* subvol for ext2 image file */
1706         ret = create_subvol(trans, root, EXT2_IMAGE_SUBVOL_OBJECTID);
1707         BUG_ON(ret);
1708         /* subvol for data relocation */
1709         ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
1710         BUG_ON(ret);
1711
1712         ret = __btrfs_cow_block(trans, fs_info->csum_root,
1713                                 fs_info->csum_root->node, NULL, 0, &tmp, 0, 0);
1714         BUG_ON(ret);
1715         free_extent_buffer(tmp);
1716
1717         ret = btrfs_commit_transaction(trans, root);
1718         BUG_ON(ret);
1719 err:
1720         return ret;
1721 }
1722
1723 /*
1724  * Migrate super block to it's default position and zero 0 ~ 16k
1725  */
1726 static int migrate_super_block(int fd, u64 old_bytenr, u32 sectorsize)
1727 {
1728         int ret;
1729         struct extent_buffer *buf;
1730         struct btrfs_super_block *super;
1731         u32 len;
1732         u32 bytenr;
1733
1734         BUG_ON(sectorsize < sizeof(*super));
1735         buf = malloc(sizeof(*buf) + sectorsize);
1736         if (!buf)
1737                 return -ENOMEM;
1738
1739         buf->len = sectorsize;
1740         ret = pread(fd, buf->data, sectorsize, old_bytenr);
1741         if (ret != sectorsize)
1742                 goto fail;
1743
1744         super = (struct btrfs_super_block *)buf->data;
1745         BUG_ON(btrfs_super_bytenr(super) != old_bytenr);
1746         btrfs_set_super_bytenr(super, BTRFS_SUPER_INFO_OFFSET);
1747
1748         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1749         ret = pwrite(fd, buf->data, sectorsize, BTRFS_SUPER_INFO_OFFSET);
1750         if (ret != sectorsize)
1751                 goto fail;
1752
1753         ret = fsync(fd);
1754         if (ret)
1755                 goto fail;
1756
1757         memset(buf->data, 0, sectorsize);
1758         for (bytenr = 0; bytenr < BTRFS_SUPER_INFO_OFFSET; ) {
1759                 len = BTRFS_SUPER_INFO_OFFSET - bytenr;
1760                 if (len > sectorsize)
1761                         len = sectorsize;
1762                 ret = pwrite(fd, buf->data, len, bytenr);
1763                 if (ret != len) {
1764                         fprintf(stderr, "unable to zero fill device\n");
1765                         break;
1766                 }
1767                 bytenr += len;
1768         }
1769         ret = 0;
1770         fsync(fd);
1771 fail:
1772         free(buf);
1773         if (ret > 0)
1774                 ret = -1;
1775         return ret;
1776 }
1777
1778 static int prepare_system_chunk_sb(struct btrfs_super_block *super)
1779 {
1780         struct btrfs_chunk *chunk;
1781         struct btrfs_disk_key *key;
1782         u32 sectorsize = btrfs_super_sectorsize(super);
1783
1784         key = (struct btrfs_disk_key *)(super->sys_chunk_array);
1785         chunk = (struct btrfs_chunk *)(super->sys_chunk_array +
1786                                        sizeof(struct btrfs_disk_key));
1787
1788         btrfs_set_disk_key_objectid(key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1789         btrfs_set_disk_key_type(key, BTRFS_CHUNK_ITEM_KEY);
1790         btrfs_set_disk_key_offset(key, 0);
1791
1792         btrfs_set_stack_chunk_length(chunk, btrfs_super_total_bytes(super));
1793         btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
1794         btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
1795         btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM);
1796         btrfs_set_stack_chunk_io_align(chunk, sectorsize);
1797         btrfs_set_stack_chunk_io_width(chunk, sectorsize);
1798         btrfs_set_stack_chunk_sector_size(chunk, sectorsize);
1799         btrfs_set_stack_chunk_num_stripes(chunk, 1);
1800         btrfs_set_stack_chunk_sub_stripes(chunk, 0);
1801         chunk->stripe.devid = super->dev_item.devid;
1802         btrfs_set_stack_stripe_offset(&chunk->stripe, 0);
1803         memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE);
1804         btrfs_set_super_sys_array_size(super, sizeof(*key) + sizeof(*chunk));
1805         return 0;
1806 }
1807
1808 static int prepare_system_chunk(int fd, u64 sb_bytenr, u32 sectorsize)
1809 {
1810         int ret;
1811         struct extent_buffer *buf;
1812         struct btrfs_super_block *super;
1813
1814         BUG_ON(sectorsize < sizeof(*super));
1815         buf = malloc(sizeof(*buf) + sectorsize);
1816         if (!buf)
1817                 return -ENOMEM;
1818
1819         buf->len = sectorsize;
1820         ret = pread(fd, buf->data, sectorsize, sb_bytenr);
1821         if (ret != sectorsize)
1822                 goto fail;
1823
1824         super = (struct btrfs_super_block *)buf->data;
1825         BUG_ON(btrfs_super_bytenr(super) != sb_bytenr);
1826         BUG_ON(btrfs_super_num_devices(super) != 1);
1827
1828         ret = prepare_system_chunk_sb(super);
1829         if (ret)
1830                 goto fail;
1831
1832         csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
1833         ret = pwrite(fd, buf->data, sectorsize, sb_bytenr);
1834         if (ret != sectorsize)
1835                 goto fail;
1836
1837         ret = 0;
1838 fail:
1839         free(buf);
1840         if (ret > 0)
1841                 ret = -1;
1842         return ret;
1843 }
1844
1845 static int relocate_one_reference(struct btrfs_trans_handle *trans,
1846                                   struct btrfs_root *root,
1847                                   u64 extent_start, u64 extent_size,
1848                                   struct btrfs_key *extent_key,
1849                                   struct extent_io_tree *reloc_tree)
1850 {
1851         struct extent_buffer *leaf;
1852         struct btrfs_file_extent_item *fi;
1853         struct btrfs_key key;
1854         struct btrfs_path path;
1855         struct btrfs_inode_item inode;
1856         struct blk_iterate_data data;
1857         u64 bytenr;
1858         u64 num_bytes;
1859         u64 cur_offset;
1860         u64 new_pos;
1861         u64 nbytes;
1862         u64 sector_end;
1863         u32 sectorsize = root->sectorsize;
1864         unsigned long ptr;
1865         int datacsum;
1866         int fd;
1867         int ret;
1868
1869         btrfs_init_path(&path);
1870         ret = btrfs_search_slot(trans, root, extent_key, &path, -1, 1);
1871         if (ret)
1872                 goto fail;
1873
1874         leaf = path.nodes[0];
1875         fi = btrfs_item_ptr(leaf, path.slots[0],
1876                             struct btrfs_file_extent_item);
1877         BUG_ON(btrfs_file_extent_offset(leaf, fi) > 0);
1878         if (extent_start != btrfs_file_extent_disk_bytenr(leaf, fi) ||
1879             extent_size != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1880                 ret = 1;
1881                 goto fail;
1882         }
1883
1884         bytenr = extent_start + btrfs_file_extent_offset(leaf, fi);
1885         num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
1886
1887         ret = btrfs_del_item(trans, root, &path);
1888         if (ret)
1889                 goto fail;
1890
1891         ret = btrfs_free_extent(trans, root, extent_start, extent_size, 0,
1892                                 root->root_key.objectid,
1893                                 extent_key->objectid, extent_key->offset);
1894         if (ret)
1895                 goto fail;
1896
1897         btrfs_release_path(&path);
1898
1899         key.objectid = extent_key->objectid;
1900         key.offset = 0;
1901         key.type =  BTRFS_INODE_ITEM_KEY;
1902         ret = btrfs_lookup_inode(trans, root, &path, &key, 0);
1903         if (ret)
1904                 goto fail;
1905
1906         leaf = path.nodes[0];
1907         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
1908         read_extent_buffer(leaf, &inode, ptr, sizeof(inode));
1909         btrfs_release_path(&path);
1910
1911         BUG_ON(num_bytes & (sectorsize - 1));
1912         nbytes = btrfs_stack_inode_nbytes(&inode) - num_bytes;
1913         btrfs_set_stack_inode_nbytes(&inode, nbytes);
1914         datacsum = !(btrfs_stack_inode_flags(&inode) & BTRFS_INODE_NODATASUM);
1915
1916         data = (struct blk_iterate_data) {
1917                 .trans          = trans,
1918                 .root           = root,
1919                 .inode          = &inode,
1920                 .objectid       = extent_key->objectid,
1921                 .first_block    = extent_key->offset / sectorsize,
1922                 .disk_block     = 0,
1923                 .num_blocks     = 0,
1924                 .boundary       = (u64)-1,
1925                 .checksum       = datacsum,
1926                 .errcode        = 0,
1927         };
1928
1929         cur_offset = extent_key->offset;
1930         while (num_bytes > 0) {
1931                 sector_end = bytenr + sectorsize - 1;
1932                 if (test_range_bit(reloc_tree, bytenr, sector_end,
1933                                    EXTENT_LOCKED, 1)) {
1934                         ret = get_state_private(reloc_tree, bytenr, &new_pos);
1935                         BUG_ON(ret);
1936                 } else {
1937                         ret = custom_alloc_extent(root, sectorsize, 0, &key);
1938                         if (ret)
1939                                 goto fail;
1940                         new_pos = key.objectid;
1941
1942                         if (cur_offset == extent_key->offset) {
1943                                 fd = root->fs_info->fs_devices->latest_bdev;
1944                                 readahead(fd, bytenr, num_bytes);
1945                         }
1946                         ret = copy_disk_extent(root, new_pos, bytenr,
1947                                                sectorsize);
1948                         if (ret)
1949                                 goto fail;
1950                         ret = set_extent_bits(reloc_tree, bytenr, sector_end,
1951                                               EXTENT_LOCKED, GFP_NOFS);
1952                         BUG_ON(ret);
1953                         ret = set_state_private(reloc_tree, bytenr, new_pos);
1954                         BUG_ON(ret);
1955                 }
1956
1957                 ret = block_iterate_proc(NULL, new_pos / sectorsize,
1958                                          cur_offset / sectorsize, &data);
1959                 if (ret & BLOCK_ABORT) {
1960                         ret = data.errcode;
1961                         goto fail;
1962                 }
1963
1964                 cur_offset += sectorsize;
1965                 bytenr += sectorsize;
1966                 num_bytes -= sectorsize;
1967         }
1968
1969         if (data.num_blocks > 0) {
1970                 ret = record_file_blocks(trans, root,
1971                                          extent_key->objectid, &inode,
1972                                          data.first_block, data.disk_block,
1973                                          data.num_blocks, datacsum);
1974                 if (ret)
1975                         goto fail;
1976         }
1977
1978         key.objectid = extent_key->objectid;
1979         key.offset = 0;
1980         key.type =  BTRFS_INODE_ITEM_KEY;
1981         ret = btrfs_lookup_inode(trans, root, &path, &key, 1);
1982         if (ret)
1983                 goto fail;
1984
1985         leaf = path.nodes[0];
1986         ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
1987         write_extent_buffer(leaf, &inode, ptr, sizeof(inode));
1988         btrfs_mark_buffer_dirty(leaf);
1989         btrfs_release_path(&path);
1990
1991 fail:
1992         btrfs_release_path(&path);
1993         return ret;
1994 }
1995
1996 static int relocate_extents_range(struct btrfs_root *fs_root,
1997                                   struct btrfs_root *ext2_root,
1998                                   u64 start_byte, u64 end_byte)
1999 {
2000         struct btrfs_fs_info *info = fs_root->fs_info;
2001         struct btrfs_root *extent_root = info->extent_root;
2002         struct btrfs_root *cur_root = NULL;
2003         struct btrfs_trans_handle *trans;
2004         struct btrfs_extent_data_ref *dref;
2005         struct btrfs_extent_inline_ref *iref;
2006         struct btrfs_extent_item *ei;
2007         struct extent_buffer *leaf;
2008         struct btrfs_key key;
2009         struct btrfs_key extent_key;
2010         struct btrfs_path path;
2011         struct extent_io_tree reloc_tree;
2012         unsigned long ptr;
2013         unsigned long end;
2014         u64 cur_byte;
2015         u64 num_bytes;
2016         u64 ref_root;
2017         u64 num_extents;
2018         int pass = 0;
2019         int ret;
2020
2021         btrfs_init_path(&path);
2022         extent_io_tree_init(&reloc_tree);
2023
2024         key.objectid = start_byte;
2025         key.offset = 0;
2026         key.type = BTRFS_EXTENT_ITEM_KEY;
2027         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2028         if (ret < 0)
2029                 goto fail;
2030         if (ret > 0) {
2031                 ret = btrfs_previous_item(extent_root, &path, 0,
2032                                           BTRFS_EXTENT_ITEM_KEY);
2033                 if (ret < 0)
2034                         goto fail;
2035                 if (ret == 0) {
2036                         leaf = path.nodes[0];
2037                         btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2038                         if (key.objectid + key.offset > start_byte)
2039                                 start_byte = key.objectid;
2040                 }
2041         }
2042         btrfs_release_path(&path);
2043 again:
2044         cur_root = (pass % 2 == 0) ? ext2_root : fs_root;
2045         num_extents = 0;
2046
2047         trans = btrfs_start_transaction(cur_root, 1);
2048         BUG_ON(!trans);
2049
2050         cur_byte = start_byte;
2051         while (1) {
2052                 key.objectid = cur_byte;
2053                 key.offset = 0;
2054                 key.type = BTRFS_EXTENT_ITEM_KEY;
2055                 ret = btrfs_search_slot(trans, extent_root,
2056                                         &key, &path, 0, 0);
2057                 if (ret < 0)
2058                         goto fail;
2059 next:
2060                 leaf = path.nodes[0];
2061                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2062                         ret = btrfs_next_leaf(extent_root, &path);
2063                         if (ret < 0)
2064                                 goto fail;
2065                         if (ret > 0)
2066                                 break;
2067                         leaf = path.nodes[0];
2068                 }
2069
2070                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2071                 if (key.objectid < cur_byte ||
2072                     key.type != BTRFS_EXTENT_ITEM_KEY) {
2073                         path.slots[0]++;
2074                         goto next;
2075                 }
2076                 if (key.objectid >= end_byte)
2077                         break;
2078
2079                 num_extents++;
2080
2081                 cur_byte = key.objectid;
2082                 num_bytes = key.offset;
2083                 ei = btrfs_item_ptr(leaf, path.slots[0],
2084                                     struct btrfs_extent_item);
2085                 BUG_ON(!(btrfs_extent_flags(leaf, ei) &
2086                          BTRFS_EXTENT_FLAG_DATA));
2087
2088                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2089                 end = ptr + btrfs_item_size_nr(leaf, path.slots[0]);
2090
2091                 ptr += sizeof(struct btrfs_extent_item);
2092
2093                 while (ptr < end) {
2094                         iref = (struct btrfs_extent_inline_ref *)ptr;
2095                         key.type = btrfs_extent_inline_ref_type(leaf, iref);
2096                         BUG_ON(key.type != BTRFS_EXTENT_DATA_REF_KEY);
2097                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
2098                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
2099                         extent_key.objectid =
2100                                 btrfs_extent_data_ref_objectid(leaf, dref);
2101                         extent_key.offset =
2102                                 btrfs_extent_data_ref_offset(leaf, dref);
2103                         extent_key.type = BTRFS_EXTENT_DATA_KEY;
2104                         BUG_ON(btrfs_extent_data_ref_count(leaf, dref) != 1);
2105
2106                         if (ref_root == cur_root->root_key.objectid)
2107                                 break;
2108
2109                         ptr += btrfs_extent_inline_ref_size(key.type);
2110                 }
2111
2112                 if (ptr >= end) {
2113                         path.slots[0]++;
2114                         goto next;
2115                 }
2116
2117                 ret = relocate_one_reference(trans, cur_root, cur_byte,
2118                                              num_bytes, &extent_key,
2119                                              &reloc_tree);
2120                 if (ret < 0)
2121                         goto fail;
2122
2123                 cur_byte += num_bytes;
2124                 btrfs_release_path(&path);
2125
2126                 if (trans->blocks_used >= 4096) {
2127                         ret = btrfs_commit_transaction(trans, cur_root);
2128                         BUG_ON(ret);
2129                         trans = btrfs_start_transaction(cur_root, 1);
2130                         BUG_ON(!trans);
2131                 }
2132         }
2133         btrfs_release_path(&path);
2134
2135         ret = btrfs_commit_transaction(trans, cur_root);
2136         BUG_ON(ret);
2137
2138         if (num_extents > 0 && pass++ < 16)
2139                 goto again;
2140
2141         ret = (num_extents > 0) ? -1 : 0;
2142 fail:
2143         btrfs_release_path(&path);
2144         extent_io_tree_cleanup(&reloc_tree);
2145         return ret;
2146 }
2147
2148 /*
2149  * relocate data in system chunk
2150  */
2151 static int cleanup_sys_chunk(struct btrfs_root *fs_root,
2152                              struct btrfs_root *ext2_root)
2153 {
2154         struct btrfs_block_group_cache *cache;
2155         int i, ret = 0;
2156         u64 offset = 0;
2157         u64 end_byte;
2158
2159         while(1) {
2160                 cache = btrfs_lookup_block_group(fs_root->fs_info, offset);
2161                 if (!cache)
2162                         break;
2163
2164                 end_byte = cache->key.objectid + cache->key.offset;
2165                 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2166                         ret = relocate_extents_range(fs_root, ext2_root,
2167                                                      cache->key.objectid,
2168                                                      end_byte);
2169                         if (ret)
2170                                 goto fail;
2171                 }
2172                 offset = end_byte;
2173         }
2174         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2175                 offset = btrfs_sb_offset(i);
2176                 offset &= ~((u64)STRIPE_LEN - 1);
2177
2178                 ret = relocate_extents_range(fs_root, ext2_root,
2179                                              offset, offset + STRIPE_LEN);
2180                 if (ret)
2181                         goto fail;
2182         }
2183         ret = 0;
2184 fail:
2185         return ret;
2186 }
2187
2188 static int fixup_chunk_mapping(struct btrfs_root *root)
2189 {
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_fs_info *info = root->fs_info;
2192         struct btrfs_root *chunk_root = info->chunk_root;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_path path;
2196         struct btrfs_chunk chunk;
2197         unsigned long ptr;
2198         u32 size;
2199         u64 type;
2200         int ret;
2201
2202         btrfs_init_path(&path);
2203
2204         trans = btrfs_start_transaction(root, 1);
2205         BUG_ON(!trans);
2206
2207         /*
2208          * recow the whole chunk tree. this will move all chunk tree blocks
2209          * into system block group.
2210          */
2211         memset(&key, 0, sizeof(key));
2212         while (1) {
2213                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2214                 if (ret < 0)
2215                         goto err;
2216
2217                 ret = btrfs_next_leaf(chunk_root, &path);
2218                 if (ret < 0)
2219                         goto err;
2220                 if (ret > 0)
2221                         break;
2222
2223                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2224                 btrfs_release_path(&path);
2225         }
2226         btrfs_release_path(&path);
2227
2228         /* fixup the system chunk array in super block */
2229         btrfs_set_super_sys_array_size(info->super_copy, 0);
2230
2231         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2232         key.offset = 0;
2233         key.type = BTRFS_CHUNK_ITEM_KEY;
2234
2235         ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 0);
2236         if (ret < 0)
2237                 goto err;
2238         BUG_ON(ret != 0);
2239         while(1) {
2240                 leaf = path.nodes[0];
2241                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2242                         ret = btrfs_next_leaf(chunk_root, &path);
2243                         if (ret < 0)
2244                                 goto err;
2245                         if (ret > 0)
2246                                 break;
2247                         leaf = path.nodes[0];
2248                 }
2249                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2250                 if (key.type != BTRFS_CHUNK_ITEM_KEY)
2251                         goto next;
2252
2253                 ptr = btrfs_item_ptr_offset(leaf, path.slots[0]);
2254                 size = btrfs_item_size_nr(leaf, path.slots[0]);
2255                 BUG_ON(size != sizeof(chunk));
2256                 read_extent_buffer(leaf, &chunk, ptr, size);
2257                 type = btrfs_stack_chunk_type(&chunk);
2258
2259                 if (!(type & BTRFS_BLOCK_GROUP_SYSTEM))
2260                         goto next;
2261
2262                 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
2263                                              &chunk, size);
2264                 if (ret)
2265                         goto err;
2266 next:
2267                 path.slots[0]++;
2268         }
2269
2270         ret = btrfs_commit_transaction(trans, root);
2271         BUG_ON(ret);
2272 err:
2273         btrfs_release_path(&path);
2274         return ret;
2275 }
2276
2277 static int do_convert(const char *devname, int datacsum, int packing,
2278                 int noxattr)
2279 {
2280         int i, ret;
2281         int fd = -1;
2282         u32 blocksize;
2283         u64 blocks[7];
2284         u64 total_bytes;
2285         u64 super_bytenr;
2286         ext2_filsys ext2_fs;
2287         struct btrfs_root *root;
2288         struct btrfs_root *ext2_root;
2289
2290         ret = open_ext2fs(devname, &ext2_fs);
2291         if (ret) {
2292                 fprintf(stderr, "unable to open the Ext2fs\n");
2293                 goto fail;
2294         }
2295         blocksize = ext2_fs->blocksize;
2296         total_bytes = (u64)ext2_fs->super->s_blocks_count * blocksize;
2297         if (blocksize < 4096) {
2298                 fprintf(stderr, "block size is too small\n");
2299                 goto fail;
2300         }
2301         if (!(ext2_fs->super->s_feature_incompat &
2302               EXT2_FEATURE_INCOMPAT_FILETYPE)) {
2303                 fprintf(stderr, "filetype feature is missing\n");
2304                 goto fail;
2305         }
2306         for (i = 0; i < 7; i++) {
2307                 ret = ext2_alloc_block(ext2_fs, 0, blocks + i);
2308                 if (ret) {
2309                         fprintf(stderr, "not enough free space\n");
2310                         goto fail;
2311                 }
2312                 blocks[i] *= blocksize;
2313         }
2314         super_bytenr = blocks[0];
2315         fd = open(devname, O_RDWR);
2316         if (fd < 0) {
2317                 fprintf(stderr, "unable to open %s\n", devname);
2318                 goto fail;
2319         }
2320         ret = make_btrfs(fd, devname, ext2_fs->super->s_volume_name,
2321                          blocks, total_bytes, blocksize, blocksize,
2322                          blocksize, blocksize, 0);
2323         if (ret) {
2324                 fprintf(stderr, "unable to create initial ctree: %s\n",
2325                         strerror(-ret));
2326                 goto fail;
2327         }
2328         /* create a system chunk that maps the whole device */
2329         ret = prepare_system_chunk(fd, super_bytenr, blocksize);
2330         if (ret) {
2331                 fprintf(stderr, "unable to update system chunk\n");
2332                 goto fail;
2333         }
2334         root = open_ctree_fd(fd, devname, super_bytenr, O_RDWR);
2335         if (!root) {
2336                 fprintf(stderr, "unable to open ctree\n");
2337                 goto fail;
2338         }
2339         ret = cache_free_extents(root, ext2_fs);
2340         if (ret) {
2341                 fprintf(stderr, "error during cache_free_extents %d\n", ret);
2342                 goto fail;
2343         }
2344         root->fs_info->extent_ops = &extent_ops;
2345         /* recover block allocation bitmap */
2346         for (i = 0; i < 7; i++) {
2347                 blocks[i] /= blocksize;
2348                 ext2_free_block(ext2_fs, blocks[i]);
2349         }
2350         ret = init_btrfs(root);
2351         if (ret) {
2352                 fprintf(stderr, "unable to setup the root tree\n");
2353                 goto fail;
2354         }
2355         printf("creating btrfs metadata.\n");
2356         ret = copy_inodes(root, ext2_fs, datacsum, packing, noxattr);
2357         if (ret) {
2358                 fprintf(stderr, "error during copy_inodes %d\n", ret);
2359                 goto fail;
2360         }
2361         printf("creating ext2fs image file.\n");
2362         ext2_root = link_subvol(root, "ext2_saved", EXT2_IMAGE_SUBVOL_OBJECTID);
2363         if (!ext2_root) {
2364                 fprintf(stderr, "unable to create subvol\n");
2365                 goto fail;
2366         }
2367         ret = create_ext2_image(ext2_root, ext2_fs, "image");
2368         if (ret) {
2369                 fprintf(stderr, "error during create_ext2_image %d\n", ret);
2370                 goto fail;
2371         }
2372         printf("cleaning up system chunk.\n");
2373         ret = cleanup_sys_chunk(root, ext2_root);
2374         if (ret) {
2375                 fprintf(stderr, "error during cleanup_sys_chunk %d\n", ret);
2376                 goto fail;
2377         }
2378         ret = close_ctree(root);
2379         if (ret) {
2380                 fprintf(stderr, "error during close_ctree %d\n", ret);
2381                 goto fail;
2382         }
2383         close_ext2fs(ext2_fs);
2384
2385         /*
2386          * If this step succeed, we get a mountable btrfs. Otherwise
2387          * the ext2fs is left unchanged.
2388          */
2389         ret = migrate_super_block(fd, super_bytenr, blocksize);
2390         if (ret) {
2391                 fprintf(stderr, "unable to migrate super block\n");
2392                 goto fail;
2393         }
2394
2395         root = open_ctree_fd(fd, devname, 0, O_RDWR);
2396         if (!root) {
2397                 fprintf(stderr, "unable to open ctree\n");
2398                 goto fail;
2399         }
2400         /* move chunk tree into system chunk. */
2401         ret = fixup_chunk_mapping(root);
2402         if (ret) {
2403                 fprintf(stderr, "error during fixup_chunk_tree\n");
2404                 goto fail;
2405         }
2406         ret = close_ctree(root);
2407         close(fd);
2408
2409         printf("conversion complete.\n");
2410         return 0;
2411 fail:
2412         if (fd != -1)
2413                 close(fd);
2414         fprintf(stderr, "conversion aborted.\n");
2415         return -1;
2416 }
2417
2418 static int may_rollback(struct btrfs_root *root)
2419 {
2420         struct btrfs_fs_info *info = root->fs_info;
2421         struct btrfs_multi_bio *multi = NULL;
2422         u64 bytenr;
2423         u64 length;
2424         u64 physical;
2425         u64 total_bytes;
2426         int num_stripes;
2427         int ret;
2428
2429         if (btrfs_super_num_devices(info->super_copy) != 1)
2430                 goto fail;
2431
2432         bytenr = BTRFS_SUPER_INFO_OFFSET;
2433         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
2434
2435         while (1) {
2436                 ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr,
2437                                       &length, &multi, 0, NULL);
2438                 if (ret)
2439                         goto fail;
2440
2441                 num_stripes = multi->num_stripes;
2442                 physical = multi->stripes[0].physical;
2443                 kfree(multi);
2444
2445                 if (num_stripes != 1 || physical != bytenr)
2446                         goto fail;
2447
2448                 bytenr += length;
2449                 if (bytenr >= total_bytes)
2450                         break;
2451         }
2452         return 0;
2453 fail:
2454         return -1;
2455 }
2456
2457 static int do_rollback(const char *devname, int force)
2458 {
2459         int fd = -1;
2460         int ret;
2461         int i;
2462         struct btrfs_root *root;
2463         struct btrfs_root *ext2_root;
2464         struct btrfs_root *chunk_root;
2465         struct btrfs_dir_item *dir;
2466         struct btrfs_inode_item *inode;
2467         struct btrfs_file_extent_item *fi;
2468         struct btrfs_trans_handle *trans;
2469         struct extent_buffer *leaf;
2470         struct btrfs_block_group_cache *cache1;
2471         struct btrfs_block_group_cache *cache2;
2472         struct btrfs_key key;
2473         struct btrfs_path path;
2474         struct extent_io_tree io_tree;
2475         char *buf = NULL;
2476         char *name;
2477         u64 bytenr;
2478         u64 num_bytes;
2479         u64 root_dir;
2480         u64 objectid;
2481         u64 offset;
2482         u64 start;
2483         u64 end;
2484         u64 sb_bytenr;
2485         u64 first_free;
2486         u64 total_bytes;
2487         u32 sectorsize;
2488
2489         extent_io_tree_init(&io_tree);
2490
2491         fd = open(devname, O_RDWR);
2492         if (fd < 0) {
2493                 fprintf(stderr, "unable to open %s\n", devname);
2494                 goto fail;
2495         }
2496         root = open_ctree_fd(fd, devname, 0, O_RDWR);
2497         if (!root) {
2498                 fprintf(stderr, "unable to open ctree\n");
2499                 goto fail;
2500         }
2501         ret = may_rollback(root);
2502         if (ret < 0) {
2503                 fprintf(stderr, "unable to do rollback\n");
2504                 goto fail;
2505         }
2506
2507         sectorsize = root->sectorsize;
2508         buf = malloc(sectorsize);
2509         if (!buf) {
2510                 fprintf(stderr, "unable to allocate memory\n");
2511                 goto fail;
2512         }
2513
2514         btrfs_init_path(&path);
2515
2516         key.objectid = EXT2_IMAGE_SUBVOL_OBJECTID;
2517         key.type = BTRFS_ROOT_ITEM_KEY;
2518         key.offset = (u64)-1;
2519         ext2_root = btrfs_read_fs_root(root->fs_info, &key);
2520         if (!ext2_root || IS_ERR(ext2_root)) {
2521                 fprintf(stderr, "unable to open subvol %llu\n",
2522                         key.objectid);
2523                 goto fail;
2524         }
2525
2526         name = "image";
2527         root_dir = btrfs_root_dirid(&root->root_item);
2528         dir = btrfs_lookup_dir_item(NULL, ext2_root, &path,
2529                                    root_dir, name, strlen(name), 0);
2530         if (!dir || IS_ERR(dir)) {
2531                 fprintf(stderr, "unable to find file %s\n", name);
2532                 goto fail;
2533         }
2534         leaf = path.nodes[0];
2535         btrfs_dir_item_key_to_cpu(leaf, dir, &key);
2536         btrfs_release_path(&path);
2537
2538         objectid = key.objectid;
2539
2540         ret = btrfs_lookup_inode(NULL, ext2_root, &path, &key, 0);
2541         if (ret) {
2542                 fprintf(stderr, "unable to find inode item\n");
2543                 goto fail;
2544         }
2545         leaf = path.nodes[0];
2546         inode = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item);
2547         total_bytes = btrfs_inode_size(leaf, inode);
2548         btrfs_release_path(&path);
2549
2550         key.objectid = objectid;
2551         key.offset = 0;
2552         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2553         ret = btrfs_search_slot(NULL, ext2_root, &key, &path, 0, 0);
2554         if (ret != 0) {
2555                 fprintf(stderr, "unable to find first file extent\n");
2556                 btrfs_release_path(&path);
2557                 goto fail;
2558         }
2559
2560         /* build mapping tree for the relocated blocks */
2561         for (offset = 0; offset < total_bytes; ) {
2562                 leaf = path.nodes[0];
2563                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2564                         ret = btrfs_next_leaf(root, &path);
2565                         if (ret != 0)
2566                                 break;  
2567                         continue;
2568                 }
2569
2570                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2571                 if (key.objectid != objectid || key.offset != offset ||
2572                     btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2573                         break;
2574
2575                 fi = btrfs_item_ptr(leaf, path.slots[0],
2576                                     struct btrfs_file_extent_item);
2577                 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2578                         break;
2579                 if (btrfs_file_extent_compression(leaf, fi) ||
2580                     btrfs_file_extent_encryption(leaf, fi) ||
2581                     btrfs_file_extent_other_encoding(leaf, fi))
2582                         break;
2583
2584                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2585                 /* skip holes and direct mapped extents */
2586                 if (bytenr == 0 || bytenr == offset)
2587                         goto next_extent;
2588
2589                 bytenr += btrfs_file_extent_offset(leaf, fi);
2590                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
2591
2592                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
2593                 cache2 =  btrfs_lookup_block_group(root->fs_info,
2594                                                    offset + num_bytes - 1);
2595                 if (!cache1 || cache1 != cache2 ||
2596                     (!(cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
2597                      !intersect_with_sb(offset, num_bytes)))
2598                         break;
2599
2600                 set_extent_bits(&io_tree, offset, offset + num_bytes - 1,
2601                                 EXTENT_LOCKED, GFP_NOFS);
2602                 set_state_private(&io_tree, offset, bytenr);
2603 next_extent:
2604                 offset += btrfs_file_extent_num_bytes(leaf, fi);
2605                 path.slots[0]++;
2606         }
2607         btrfs_release_path(&path);
2608
2609         if (offset < total_bytes) {
2610                 fprintf(stderr, "unable to build extent mapping\n");
2611                 goto fail;
2612         }
2613
2614         first_free = BTRFS_SUPER_INFO_OFFSET + 2 * sectorsize - 1;
2615         first_free &= ~((u64)sectorsize - 1);
2616         /* backup for extent #0 should exist */
2617         if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) {
2618                 fprintf(stderr, "no backup for the first extent\n");
2619                 goto fail;
2620         }
2621         /* force no allocation from system block group */
2622         root->fs_info->system_allocs = -1;
2623         trans = btrfs_start_transaction(root, 1);
2624         BUG_ON(!trans);
2625         /*
2626          * recow the whole chunk tree, this will remove all chunk tree blocks
2627          * from system block group
2628          */
2629         chunk_root = root->fs_info->chunk_root;
2630         memset(&key, 0, sizeof(key));
2631         while (1) {
2632                 ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
2633                 if (ret < 0)
2634                         break;
2635
2636                 ret = btrfs_next_leaf(chunk_root, &path);
2637                 if (ret)
2638                         break;
2639
2640                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
2641                 btrfs_release_path(&path);
2642         }
2643         btrfs_release_path(&path);
2644
2645         offset = 0;
2646         num_bytes = 0;
2647         while(1) {
2648                 cache1 = btrfs_lookup_block_group(root->fs_info, offset);
2649                 if (!cache1)
2650                         break;
2651
2652                 if (cache1->flags & BTRFS_BLOCK_GROUP_SYSTEM)
2653                         num_bytes += btrfs_block_group_used(&cache1->item);
2654
2655                 offset = cache1->key.objectid + cache1->key.offset;
2656         }
2657         /* only extent #0 left in system block group? */
2658         if (num_bytes > first_free) {
2659                 fprintf(stderr, "unable to empty system block group\n");
2660                 goto fail;
2661         }
2662         /* create a system chunk that maps the whole device */
2663         ret = prepare_system_chunk_sb(root->fs_info->super_copy);
2664         if (ret) {
2665                 fprintf(stderr, "unable to update system chunk\n");
2666                 goto fail;
2667         }
2668
2669         ret = btrfs_commit_transaction(trans, root);
2670         BUG_ON(ret);
2671
2672         ret = close_ctree(root);
2673         if (ret) {
2674                 fprintf(stderr, "error during close_ctree %d\n", ret);
2675                 goto fail;
2676         }
2677
2678         /* zero btrfs super block mirrors */
2679         memset(buf, 0, sectorsize);
2680         for (i = 1 ; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2681                 bytenr = btrfs_sb_offset(i);
2682                 if (bytenr >= total_bytes)
2683                         break;
2684                 ret = pwrite(fd, buf, sectorsize, bytenr);
2685         }
2686
2687         sb_bytenr = (u64)-1;
2688         /* copy all relocated blocks back */
2689         while(1) {
2690                 ret = find_first_extent_bit(&io_tree, 0, &start, &end,
2691                                             EXTENT_LOCKED);
2692                 if (ret)
2693                         break;
2694
2695                 ret = get_state_private(&io_tree, start, &bytenr);
2696                 BUG_ON(ret);
2697
2698                 clear_extent_bits(&io_tree, start, end, EXTENT_LOCKED,
2699                                   GFP_NOFS);
2700
2701                 while (start <= end) {
2702                         if (start == BTRFS_SUPER_INFO_OFFSET) {
2703                                 sb_bytenr = bytenr;
2704                                 goto next_sector;
2705                         }
2706                         ret = pread(fd, buf, sectorsize, bytenr);
2707                         if (ret < 0) {
2708                                 fprintf(stderr, "error during pread %d\n", ret);
2709                                 goto fail;
2710                         }
2711                         BUG_ON(ret != sectorsize);
2712                         ret = pwrite(fd, buf, sectorsize, start);
2713                         if (ret < 0) {
2714                                 fprintf(stderr, "error during pwrite %d\n", ret);
2715                                 goto fail;
2716                         }
2717                         BUG_ON(ret != sectorsize);
2718 next_sector:
2719                         start += sectorsize;
2720                         bytenr += sectorsize;
2721                 }
2722         }
2723
2724         ret = fsync(fd);
2725         if (ret) {
2726                 fprintf(stderr, "error during fsync %d\n", ret);
2727                 goto fail;
2728         }
2729         /*
2730          * finally, overwrite btrfs super block.
2731          */
2732         ret = pread(fd, buf, sectorsize, sb_bytenr);
2733         if (ret < 0) {
2734                 fprintf(stderr, "error during pread %d\n", ret);
2735                 goto fail;
2736         }
2737         BUG_ON(ret != sectorsize);
2738         ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET);
2739         if (ret < 0) {
2740                 fprintf(stderr, "error during pwrite %d\n", ret);
2741                 goto fail;
2742         }
2743         BUG_ON(ret != sectorsize);
2744         ret = fsync(fd);
2745         if (ret) {
2746                 fprintf(stderr, "error during fsync %d\n", ret);
2747                 goto fail;
2748         }
2749
2750         close(fd);
2751         free(buf);
2752         extent_io_tree_cleanup(&io_tree);
2753         printf("rollback complete.\n");
2754         return 0;
2755
2756 fail:
2757         if (fd != -1)
2758                 close(fd);
2759         free(buf);
2760         fprintf(stderr, "rollback aborted.\n");
2761         return -1;
2762 }
2763
2764 static void print_usage(void)
2765 {
2766         printf("usage: btrfs-convert [-d] [-i] [-n] [-r] device\n");
2767         printf("\t-d disable data checksum\n");
2768         printf("\t-i ignore xattrs and ACLs\n");
2769         printf("\t-n disable packing of small files\n");
2770         printf("\t-r roll back to ext2fs\n");
2771 }
2772
2773 int main(int argc, char *argv[])
2774 {
2775         int ret;
2776         int packing = 1;
2777         int noxattr = 0;
2778         int datacsum = 1;
2779         int rollback = 0;
2780         char *file;
2781         while(1) {
2782                 int c = getopt(argc, argv, "dinr");
2783                 if (c < 0)
2784                         break;
2785                 switch(c) {
2786                         case 'd':
2787                                 datacsum = 0;
2788                                 break;
2789                         case 'i':
2790                                 noxattr = 1;
2791                                 break;
2792                         case 'n':
2793                                 packing = 0;
2794                                 break;
2795                         case 'r':
2796                                 rollback = 1;
2797                                 break;
2798                         default:
2799                                 print_usage();
2800                                 return 1;
2801                 }
2802         }
2803         argc = argc - optind;
2804         if (argc != 1) {
2805                 print_usage();
2806                 return 1;
2807         }
2808
2809         file = argv[optind];
2810         if (check_mounted(file)) {
2811                 fprintf(stderr, "%s is mounted\n", file);
2812                 return 1;
2813         }
2814
2815         if (rollback) {
2816                 ret = do_rollback(file, 0);
2817         } else {
2818                 ret = do_convert(file, datacsum, packing, noxattr);
2819         }
2820         if (ret)
2821                 return 1;
2822         return 0;
2823 }