btrfs-progs: check: introduce traversal function for fsck
[platform/upstream/btrfs-progs.git] / utils.c
diff --git a/utils.c b/utils.c
index eeff910..79bf946 100644 (file)
--- a/utils.c
+++ b/utils.c
@@ -182,9 +182,831 @@ int test_uuid_unique(char *fs_uuid)
 }
 
 /*
+ * Reserve space from free_tree.
+ * The algorithm is very simple, find the first cache_extent with enough space
+ * and allocate from its beginning.
+ */
+static int reserve_free_space(struct cache_tree *free_tree, u64 len,
+                             u64 *ret_start)
+{
+       struct cache_extent *cache;
+       int found = 0;
+
+       BUG_ON(!ret_start);
+       cache = first_cache_extent(free_tree);
+       while (cache) {
+               if (cache->size > len) {
+                       found = 1;
+                       *ret_start = cache->start;
+
+                       cache->size -= len;
+                       if (cache->size == 0) {
+                               remove_cache_extent(free_tree, cache);
+                               free(cache);
+                       } else {
+                               cache->start += len;
+                       }
+                       break;
+               }
+               cache = next_cache_extent(cache);
+       }
+       if (!found)
+               return -ENOSPC;
+       return 0;
+}
+
+static inline int write_temp_super(int fd, struct btrfs_super_block *sb,
+                                  u64 sb_bytenr)
+{
+       u32 crc = ~(u32)0;
+       int ret;
+
+       crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc,
+                             BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+       btrfs_csum_final(crc, (char *)&sb->csum[0]);
+       ret = pwrite(fd, sb, BTRFS_SUPER_INFO_SIZE, sb_bytenr);
+       if (ret < BTRFS_SUPER_INFO_SIZE)
+               ret = (ret < 0 ? -errno : -EIO);
+       else
+               ret = 0;
+       return ret;
+}
+
+/*
+ * Setup temporary superblock at cfg->super_bynter
+ * Needed info are extracted from cfg, and root_bytenr, chunk_bytenr
+ *
+ * For now sys chunk array will be empty and dev_item is empty too.
+ * They will be re-initialized at temp chunk tree setup.
+ */
+static int setup_temp_super(int fd, struct btrfs_mkfs_config *cfg,
+                           u64 root_bytenr, u64 chunk_bytenr)
+{
+       unsigned char chunk_uuid[BTRFS_UUID_SIZE];
+       char super_buf[BTRFS_SUPER_INFO_SIZE];
+       struct btrfs_super_block *super = (struct btrfs_super_block *)super_buf;
+       int ret;
+
+       /*
+        * We rely on cfg->chunk_uuid and cfg->fs_uuid to pass uuid
+        * for other functions.
+        * Caller must allocate space for them
+        */
+       BUG_ON(!cfg->chunk_uuid || !cfg->fs_uuid);
+       memset(super_buf, 0, BTRFS_SUPER_INFO_SIZE);
+       cfg->num_bytes = round_down(cfg->num_bytes, cfg->sectorsize);
+
+       if (cfg->fs_uuid && *cfg->fs_uuid) {
+               if (uuid_parse(cfg->fs_uuid, super->fsid) != 0) {
+                       error("cound not parse UUID: %s", cfg->fs_uuid);
+                       ret = -EINVAL;
+                       goto out;
+               }
+               if (!test_uuid_unique(cfg->fs_uuid)) {
+                       error("non-unique UUID: %s", cfg->fs_uuid);
+                       ret = -EINVAL;
+                       goto out;
+               }
+       } else {
+               uuid_generate(super->fsid);
+               uuid_unparse(super->fsid, cfg->fs_uuid);
+       }
+       uuid_generate(chunk_uuid);
+       uuid_unparse(chunk_uuid, cfg->chunk_uuid);
+
+       btrfs_set_super_bytenr(super, cfg->super_bytenr);
+       btrfs_set_super_num_devices(super, 1);
+       btrfs_set_super_magic(super, BTRFS_MAGIC);
+       btrfs_set_super_generation(super, 1);
+       btrfs_set_super_root(super, root_bytenr);
+       btrfs_set_super_chunk_root(super, chunk_bytenr);
+       btrfs_set_super_total_bytes(super, cfg->num_bytes);
+       /*
+        * Temporary filesystem will only have 6 tree roots:
+        * chunk tree, root tree, extent_tree, device tree, fs tree
+        * and csum tree.
+        */
+       btrfs_set_super_bytes_used(super, 6 * cfg->nodesize);
+       btrfs_set_super_sectorsize(super, cfg->sectorsize);
+       btrfs_set_super_leafsize(super, cfg->nodesize);
+       btrfs_set_super_nodesize(super, cfg->nodesize);
+       btrfs_set_super_stripesize(super, cfg->stripesize);
+       btrfs_set_super_csum_type(super, BTRFS_CSUM_TYPE_CRC32);
+       btrfs_set_super_chunk_root(super, chunk_bytenr);
+       btrfs_set_super_cache_generation(super, -1);
+       btrfs_set_super_incompat_flags(super, cfg->features);
+       if (cfg->label)
+               __strncpy_null(super->label, cfg->label, BTRFS_LABEL_SIZE - 1);
+
+       /* Sys chunk array will be re-initialized at chunk tree init time */
+       super->sys_chunk_array_size = 0;
+
+       ret = write_temp_super(fd, super, cfg->super_bytenr);
+out:
+       return ret;
+}
+
+/*
+ * Setup an extent buffer for tree block.
+ */
+static int setup_temp_extent_buffer(struct extent_buffer *buf,
+                                   struct btrfs_mkfs_config *cfg,
+                                   u64 bytenr, u64 owner)
+{
+       unsigned char fsid[BTRFS_FSID_SIZE];
+       unsigned char chunk_uuid[BTRFS_UUID_SIZE];
+       int ret;
+
+       /* We rely on cfg->fs_uuid and chunk_uuid to fsid and chunk uuid */
+       BUG_ON(!cfg->fs_uuid || !cfg->chunk_uuid);
+       ret = uuid_parse(cfg->fs_uuid, fsid);
+       if (ret)
+               return -EINVAL;
+       ret = uuid_parse(cfg->chunk_uuid, chunk_uuid);
+       if (ret)
+               return -EINVAL;
+
+       memset(buf->data, 0, cfg->nodesize);
+       buf->len = cfg->nodesize;
+       btrfs_set_header_bytenr(buf, bytenr);
+       btrfs_set_header_generation(buf, 1);
+       btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
+       btrfs_set_header_owner(buf, owner);
+       btrfs_set_header_flags(buf, BTRFS_HEADER_FLAG_WRITTEN);
+       write_extent_buffer(buf, chunk_uuid, btrfs_header_chunk_tree_uuid(buf),
+                           BTRFS_UUID_SIZE);
+       write_extent_buffer(buf, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE);
+       return 0;
+}
+
+static inline int write_temp_extent_buffer(int fd, struct extent_buffer *buf,
+                                          u64 bytenr)
+{
+       int ret;
+
+       csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0);
+
+       /* Temporary extent buffer is always mapped 1:1 on disk */
+       ret = pwrite(fd, buf->data, buf->len, bytenr);
+       if (ret < buf->len)
+               ret = (ret < 0 ? ret : -EIO);
+       else
+               ret = 0;
+       return ret;
+}
+
+/*
+ * Insert a root item for temporary tree root
+ *
+ * Only used in make_btrfs_v2().
+ */
+static void insert_temp_root_item(struct extent_buffer *buf,
+                                 struct btrfs_mkfs_config *cfg,
+                                 int *slot, u32 *itemoff, u64 objectid,
+                                 u64 bytenr)
+{
+       struct btrfs_root_item root_item;
+       struct btrfs_inode_item *inode_item;
+       struct btrfs_disk_key disk_key;
+
+       btrfs_set_header_nritems(buf, *slot + 1);
+       (*itemoff) -= sizeof(root_item);
+       memset(&root_item, 0, sizeof(root_item));
+       inode_item = &root_item.inode;
+       btrfs_set_stack_inode_generation(inode_item, 1);
+       btrfs_set_stack_inode_size(inode_item, 3);
+       btrfs_set_stack_inode_nlink(inode_item, 1);
+       btrfs_set_stack_inode_nbytes(inode_item, cfg->nodesize);
+       btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
+       btrfs_set_root_refs(&root_item, 1);
+       btrfs_set_root_used(&root_item, cfg->nodesize);
+       btrfs_set_root_generation(&root_item, 1);
+       btrfs_set_root_bytenr(&root_item, bytenr);
+
+       memset(&disk_key, 0, sizeof(disk_key));
+       btrfs_set_disk_key_type(&disk_key, BTRFS_ROOT_ITEM_KEY);
+       btrfs_set_disk_key_objectid(&disk_key, objectid);
+       btrfs_set_disk_key_offset(&disk_key, 0);
+
+       btrfs_set_item_key(buf, &disk_key, *slot);
+       btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(*slot), sizeof(root_item));
+       write_extent_buffer(buf, &root_item,
+                           btrfs_item_ptr_offset(buf, *slot),
+                           sizeof(root_item));
+       (*slot)++;
+}
+
+static int setup_temp_root_tree(int fd, struct btrfs_mkfs_config *cfg,
+                               u64 root_bytenr, u64 extent_bytenr,
+                               u64 dev_bytenr, u64 fs_bytenr, u64 csum_bytenr)
+{
+       struct extent_buffer *buf = NULL;
+       u32 itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize);
+       int slot = 0;
+       int ret;
+
+       /*
+        * Provided bytenr must in ascending order, or tree root will have a
+        * bad key order.
+        */
+       BUG_ON(!(root_bytenr < extent_bytenr && extent_bytenr < dev_bytenr &&
+                dev_bytenr < fs_bytenr && fs_bytenr < csum_bytenr));
+       buf = malloc(sizeof(*buf) + cfg->nodesize);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = setup_temp_extent_buffer(buf, cfg, root_bytenr,
+                                      BTRFS_ROOT_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+
+       insert_temp_root_item(buf, cfg, &slot, &itemoff,
+                             BTRFS_EXTENT_TREE_OBJECTID, extent_bytenr);
+       insert_temp_root_item(buf, cfg, &slot, &itemoff,
+                             BTRFS_DEV_TREE_OBJECTID, dev_bytenr);
+       insert_temp_root_item(buf, cfg, &slot, &itemoff,
+                             BTRFS_FS_TREE_OBJECTID, fs_bytenr);
+       insert_temp_root_item(buf, cfg, &slot, &itemoff,
+                             BTRFS_CSUM_TREE_OBJECTID, csum_bytenr);
+
+       ret = write_temp_extent_buffer(fd, buf, root_bytenr);
+out:
+       free(buf);
+       return ret;
+}
+
+static int insert_temp_dev_item(int fd, struct extent_buffer *buf,
+                               struct btrfs_mkfs_config *cfg,
+                               int *slot, u32 *itemoff)
+{
+       struct btrfs_disk_key disk_key;
+       struct btrfs_dev_item *dev_item;
+       char super_buf[BTRFS_SUPER_INFO_SIZE];
+       unsigned char dev_uuid[BTRFS_UUID_SIZE];
+       unsigned char fsid[BTRFS_FSID_SIZE];
+       struct btrfs_super_block *super = (struct btrfs_super_block *)super_buf;
+       int ret;
+
+       ret = pread(fd, super_buf, BTRFS_SUPER_INFO_SIZE, cfg->super_bytenr);
+       if (ret < BTRFS_SUPER_INFO_SIZE) {
+               ret = (ret < 0 ? -errno : -EIO);
+               goto out;
+       }
+
+       btrfs_set_header_nritems(buf, *slot + 1);
+       (*itemoff) -= sizeof(*dev_item);
+       /* setup device item 1, 0 is for replace case */
+       btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_ITEM_KEY);
+       btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_ITEMS_OBJECTID);
+       btrfs_set_disk_key_offset(&disk_key, 1);
+       btrfs_set_item_key(buf, &disk_key, *slot);
+       btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(*slot), sizeof(*dev_item));
+
+       dev_item = btrfs_item_ptr(buf, *slot, struct btrfs_dev_item);
+       /* Generate device uuid */
+       uuid_generate(dev_uuid);
+       write_extent_buffer(buf, dev_uuid,
+                       (unsigned long)btrfs_device_uuid(dev_item),
+                       BTRFS_UUID_SIZE);
+       uuid_parse(cfg->fs_uuid, fsid);
+       write_extent_buffer(buf, fsid,
+                       (unsigned long)btrfs_device_fsid(dev_item),
+                       BTRFS_FSID_SIZE);
+       btrfs_set_device_id(buf, dev_item, 1);
+       btrfs_set_device_generation(buf, dev_item, 0);
+       btrfs_set_device_total_bytes(buf, dev_item, cfg->num_bytes);
+       /*
+        * The number must match the initial SYSTEM and META chunk size
+        */
+       btrfs_set_device_bytes_used(buf, dev_item,
+                       BTRFS_MKFS_SYSTEM_GROUP_SIZE +
+                       BTRFS_CONVERT_META_GROUP_SIZE);
+       btrfs_set_device_io_align(buf, dev_item, cfg->sectorsize);
+       btrfs_set_device_io_width(buf, dev_item, cfg->sectorsize);
+       btrfs_set_device_sector_size(buf, dev_item, cfg->sectorsize);
+       btrfs_set_device_type(buf, dev_item, 0);
+
+       /* Super dev_item is not complete, copy the complete one to sb */
+       read_extent_buffer(buf, &super->dev_item, (unsigned long)dev_item,
+                          sizeof(*dev_item));
+       ret = write_temp_super(fd, super, cfg->super_bytenr);
+       (*slot)++;
+out:
+       return ret;
+}
+
+static int insert_temp_chunk_item(int fd, struct extent_buffer *buf,
+                                 struct btrfs_mkfs_config *cfg,
+                                 int *slot, u32 *itemoff, u64 start, u64 len,
+                                 u64 type)
+{
+       struct btrfs_chunk *chunk;
+       struct btrfs_disk_key disk_key;
+       char super_buf[BTRFS_SUPER_INFO_SIZE];
+       struct btrfs_super_block *sb = (struct btrfs_super_block *)super_buf;
+       int ret = 0;
+
+       ret = pread(fd, super_buf, BTRFS_SUPER_INFO_SIZE,
+                   cfg->super_bytenr);
+       if (ret < BTRFS_SUPER_INFO_SIZE) {
+               ret = (ret < 0 ? ret : -EIO);
+               return ret;
+       }
+
+       btrfs_set_header_nritems(buf, *slot + 1);
+       (*itemoff) -= btrfs_chunk_item_size(1);
+       btrfs_set_disk_key_type(&disk_key, BTRFS_CHUNK_ITEM_KEY);
+       btrfs_set_disk_key_objectid(&disk_key, BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+       btrfs_set_disk_key_offset(&disk_key, start);
+       btrfs_set_item_key(buf, &disk_key, *slot);
+       btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(*slot),
+                           btrfs_chunk_item_size(1));
+
+       chunk = btrfs_item_ptr(buf, *slot, struct btrfs_chunk);
+       btrfs_set_chunk_length(buf, chunk, len);
+       btrfs_set_chunk_owner(buf, chunk, BTRFS_EXTENT_TREE_OBJECTID);
+       btrfs_set_chunk_stripe_len(buf, chunk, 64 * 1024);
+       btrfs_set_chunk_type(buf, chunk, type);
+       btrfs_set_chunk_io_align(buf, chunk, cfg->sectorsize);
+       btrfs_set_chunk_io_width(buf, chunk, cfg->sectorsize);
+       btrfs_set_chunk_sector_size(buf, chunk, cfg->sectorsize);
+       btrfs_set_chunk_num_stripes(buf, chunk, 1);
+       /* TODO: Support DUP profile for system chunk */
+       btrfs_set_stripe_devid_nr(buf, chunk, 0, 1);
+       /* We are doing 1:1 mapping, so start is its dev offset */
+       btrfs_set_stripe_offset_nr(buf, chunk, 0, start);
+       write_extent_buffer(buf, &sb->dev_item.uuid,
+                           (unsigned long)btrfs_stripe_dev_uuid_nr(chunk, 0),
+                           BTRFS_UUID_SIZE);
+       (*slot)++;
+
+       /*
+        * If it's system chunk, also copy it to super block.
+        */
+       if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+               char *cur;
+
+               cur = (char *)sb->sys_chunk_array + sb->sys_chunk_array_size;
+               memcpy(cur, &disk_key, sizeof(disk_key));
+               cur += sizeof(disk_key);
+               read_extent_buffer(buf, cur, (unsigned long int)chunk,
+                                  btrfs_chunk_item_size(1));
+               sb->sys_chunk_array_size += btrfs_chunk_item_size(1) +
+                                           sizeof(disk_key);
+
+               ret = write_temp_super(fd, sb, cfg->super_bytenr);
+       }
+       return ret;
+}
+
+static int setup_temp_chunk_tree(int fd, struct btrfs_mkfs_config *cfg,
+                                u64 sys_chunk_start, u64 meta_chunk_start,
+                                u64 chunk_bytenr)
+{
+       struct extent_buffer *buf = NULL;
+       u32 itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize);
+       int slot = 0;
+       int ret;
+
+       /* Must ensure SYS chunk starts before META chunk */
+       BUG_ON(meta_chunk_start < sys_chunk_start);
+       buf = malloc(sizeof(*buf) + cfg->nodesize);
+       if (!buf)
+               return -ENOMEM;
+       ret = setup_temp_extent_buffer(buf, cfg, chunk_bytenr,
+                                      BTRFS_CHUNK_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+
+       ret = insert_temp_dev_item(fd, buf, cfg, &slot, &itemoff);
+       if (ret < 0)
+               goto out;
+       ret = insert_temp_chunk_item(fd, buf, cfg, &slot, &itemoff,
+                                    sys_chunk_start,
+                                    BTRFS_MKFS_SYSTEM_GROUP_SIZE,
+                                    BTRFS_BLOCK_GROUP_SYSTEM);
+       if (ret < 0)
+               goto out;
+       ret = insert_temp_chunk_item(fd, buf, cfg, &slot, &itemoff,
+                                    meta_chunk_start,
+                                    BTRFS_CONVERT_META_GROUP_SIZE,
+                                    BTRFS_BLOCK_GROUP_METADATA);
+       if (ret < 0)
+               goto out;
+       ret = write_temp_extent_buffer(fd, buf, chunk_bytenr);
+
+out:
+       free(buf);
+       return ret;
+}
+
+static void insert_temp_dev_extent(struct extent_buffer *buf,
+                                  int *slot, u32 *itemoff, u64 start, u64 len)
+{
+       struct btrfs_dev_extent *dev_extent;
+       struct btrfs_disk_key disk_key;
+
+       btrfs_set_header_nritems(buf, *slot + 1);
+       (*itemoff) -= sizeof(*dev_extent);
+       btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_EXTENT_KEY);
+       btrfs_set_disk_key_objectid(&disk_key, 1);
+       btrfs_set_disk_key_offset(&disk_key, start);
+       btrfs_set_item_key(buf, &disk_key, *slot);
+       btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(*slot), sizeof(*dev_extent));
+
+       dev_extent = btrfs_item_ptr(buf, *slot, struct btrfs_dev_extent);
+       btrfs_set_dev_extent_chunk_objectid(buf, dev_extent,
+                                           BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+       btrfs_set_dev_extent_length(buf, dev_extent, len);
+       btrfs_set_dev_extent_chunk_offset(buf, dev_extent, start);
+       btrfs_set_dev_extent_chunk_tree(buf, dev_extent,
+                                       BTRFS_CHUNK_TREE_OBJECTID);
+       (*slot)++;
+}
+
+static int setup_temp_dev_tree(int fd, struct btrfs_mkfs_config *cfg,
+                              u64 sys_chunk_start, u64 meta_chunk_start,
+                              u64 dev_bytenr)
+{
+       struct extent_buffer *buf = NULL;
+       u32 itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize);
+       int slot = 0;
+       int ret;
+
+       /* Must ensure SYS chunk starts before META chunk */
+       BUG_ON(meta_chunk_start < sys_chunk_start);
+       buf = malloc(sizeof(*buf) + cfg->nodesize);
+       if (!buf)
+               return -ENOMEM;
+       ret = setup_temp_extent_buffer(buf, cfg, dev_bytenr,
+                                      BTRFS_DEV_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+       insert_temp_dev_extent(buf, &slot, &itemoff, sys_chunk_start,
+                              BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+       insert_temp_dev_extent(buf, &slot, &itemoff, meta_chunk_start,
+                              BTRFS_CONVERT_META_GROUP_SIZE);
+       ret = write_temp_extent_buffer(fd, buf, dev_bytenr);
+out:
+       free(buf);
+       return ret;
+}
+
+static int setup_temp_fs_tree(int fd, struct btrfs_mkfs_config *cfg,
+                             u64 fs_bytenr)
+{
+       struct extent_buffer *buf = NULL;
+       int ret;
+
+       buf = malloc(sizeof(*buf) + cfg->nodesize);
+       if (!buf)
+               return -ENOMEM;
+       ret = setup_temp_extent_buffer(buf, cfg, fs_bytenr,
+                                      BTRFS_FS_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+       /*
+        * Temporary fs tree is completely empty.
+        */
+       ret = write_temp_extent_buffer(fd, buf, fs_bytenr);
+out:
+       free(buf);
+       return ret;
+}
+
+static int setup_temp_csum_tree(int fd, struct btrfs_mkfs_config *cfg,
+                               u64 csum_bytenr)
+{
+       struct extent_buffer *buf = NULL;
+       int ret;
+
+       buf = malloc(sizeof(*buf) + cfg->nodesize);
+       if (!buf)
+               return -ENOMEM;
+       ret = setup_temp_extent_buffer(buf, cfg, csum_bytenr,
+                                      BTRFS_CSUM_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+       /*
+        * Temporary csum tree is completely empty.
+        */
+       ret = write_temp_extent_buffer(fd, buf, csum_bytenr);
+out:
+       free(buf);
+       return ret;
+}
+
+/*
+ * Insert one temporary extent item.
+ *
+ * NOTE: if skinny_metadata is not enabled, this function must be called
+ * after all other trees are initialized.
+ * Or fs without skinny-metadata will be screwed up.
+ */
+static int insert_temp_extent_item(int fd, struct extent_buffer *buf,
+                                  struct btrfs_mkfs_config *cfg,
+                                  int *slot, u32 *itemoff, u64 bytenr,
+                                  u64 ref_root)
+{
+       struct extent_buffer *tmp;
+       struct btrfs_extent_item *ei;
+       struct btrfs_extent_inline_ref *iref;
+       struct btrfs_disk_key disk_key;
+       struct btrfs_disk_key tree_info_key;
+       struct btrfs_tree_block_info *info;
+       int itemsize;
+       int skinny_metadata = cfg->features &
+                             BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA;
+       int ret;
+
+       if (skinny_metadata)
+               itemsize = sizeof(*ei) + sizeof(*iref);
+       else
+               itemsize = sizeof(*ei) + sizeof(*iref) +
+                          sizeof(struct btrfs_tree_block_info);
+
+       btrfs_set_header_nritems(buf, *slot + 1);
+       *(itemoff) -= itemsize;
+
+       if (skinny_metadata) {
+               btrfs_set_disk_key_type(&disk_key, BTRFS_METADATA_ITEM_KEY);
+               btrfs_set_disk_key_offset(&disk_key, 0);
+       } else {
+               btrfs_set_disk_key_type(&disk_key, BTRFS_EXTENT_ITEM_KEY);
+               btrfs_set_disk_key_offset(&disk_key, cfg->nodesize);
+       }
+       btrfs_set_disk_key_objectid(&disk_key, bytenr);
+
+       btrfs_set_item_key(buf, &disk_key, *slot);
+       btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(*slot), itemsize);
+
+       ei = btrfs_item_ptr(buf, *slot, struct btrfs_extent_item);
+       btrfs_set_extent_refs(buf, ei, 1);
+       btrfs_set_extent_generation(buf, ei, 1);
+       btrfs_set_extent_flags(buf, ei, BTRFS_EXTENT_FLAG_TREE_BLOCK);
+
+       if (skinny_metadata) {
+               iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+       } else {
+               info = (struct btrfs_tree_block_info *)(ei + 1);
+               iref = (struct btrfs_extent_inline_ref *)(info + 1);
+       }
+       btrfs_set_extent_inline_ref_type(buf, iref,
+                                        BTRFS_TREE_BLOCK_REF_KEY);
+       btrfs_set_extent_inline_ref_offset(buf, iref, ref_root);
+
+       (*slot)++;
+       if (skinny_metadata)
+               return 0;
+
+       /*
+        * Lastly, check the tree block key by read the tree block
+        * Since we do 1:1 mapping for convert case, we can directly
+        * read the bytenr from disk
+        */
+       tmp = malloc(sizeof(*tmp) + cfg->nodesize);
+       if (!tmp)
+               return -ENOMEM;
+       ret = setup_temp_extent_buffer(tmp, cfg, bytenr, ref_root);
+       if (ret < 0)
+               goto out;
+       ret = pread(fd, tmp->data, cfg->nodesize, bytenr);
+       if (ret < cfg->nodesize) {
+               ret = (ret < 0 ? -errno : -EIO);
+               goto out;
+       }
+       if (btrfs_header_nritems(tmp) == 0) {
+               btrfs_set_disk_key_type(&tree_info_key, 0);
+               btrfs_set_disk_key_objectid(&tree_info_key, 0);
+               btrfs_set_disk_key_offset(&tree_info_key, 0);
+       } else {
+               btrfs_item_key(tmp, &tree_info_key, 0);
+       }
+       btrfs_set_tree_block_key(buf, info, &tree_info_key);
+
+out:
+       free(tmp);
+       return ret;
+}
+
+static void insert_temp_block_group(struct extent_buffer *buf,
+                                  struct btrfs_mkfs_config *cfg,
+                                  int *slot, u32 *itemoff,
+                                  u64 bytenr, u64 len, u64 used, u64 flag)
+{
+       struct btrfs_block_group_item bgi;
+       struct btrfs_disk_key disk_key;
+
+       btrfs_set_header_nritems(buf, *slot + 1);
+       (*itemoff) -= sizeof(bgi);
+       btrfs_set_disk_key_type(&disk_key, BTRFS_BLOCK_GROUP_ITEM_KEY);
+       btrfs_set_disk_key_objectid(&disk_key, bytenr);
+       btrfs_set_disk_key_offset(&disk_key, len);
+       btrfs_set_item_key(buf, &disk_key, *slot);
+       btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff);
+       btrfs_set_item_size(buf, btrfs_item_nr(*slot), sizeof(bgi));
+
+       btrfs_set_block_group_flags(&bgi, flag);
+       btrfs_set_block_group_used(&bgi, used);
+       btrfs_set_block_group_chunk_objectid(&bgi,
+                       BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+       write_extent_buffer(buf, &bgi, btrfs_item_ptr_offset(buf, *slot),
+                           sizeof(bgi));
+       (*slot)++;
+}
+
+static int setup_temp_extent_tree(int fd, struct btrfs_mkfs_config *cfg,
+                                 u64 chunk_bytenr, u64 root_bytenr,
+                                 u64 extent_bytenr, u64 dev_bytenr,
+                                 u64 fs_bytenr, u64 csum_bytenr)
+{
+       struct extent_buffer *buf = NULL;
+       u32 itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize);
+       int slot = 0;
+       int ret;
+
+       /*
+        * We must ensure provided bytenr are in ascending order,
+        * or extent tree key order will be broken.
+        */
+       BUG_ON(!(chunk_bytenr < root_bytenr && root_bytenr < extent_bytenr &&
+                extent_bytenr < dev_bytenr && dev_bytenr < fs_bytenr &&
+                fs_bytenr < csum_bytenr));
+       buf = malloc(sizeof(*buf) + cfg->nodesize);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = setup_temp_extent_buffer(buf, cfg, extent_bytenr,
+                                      BTRFS_EXTENT_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+
+       ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff,
+                       chunk_bytenr, BTRFS_CHUNK_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+
+       insert_temp_block_group(buf, cfg, &slot, &itemoff, chunk_bytenr,
+                       BTRFS_MKFS_SYSTEM_GROUP_SIZE, cfg->nodesize,
+                       BTRFS_BLOCK_GROUP_SYSTEM);
+
+       ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff,
+                       root_bytenr, BTRFS_ROOT_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+
+       /* 5 tree block used, root, extent, dev, fs and csum*/
+       insert_temp_block_group(buf, cfg, &slot, &itemoff, root_bytenr,
+                       BTRFS_CONVERT_META_GROUP_SIZE, cfg->nodesize * 5,
+                       BTRFS_BLOCK_GROUP_METADATA);
+
+       ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff,
+                       extent_bytenr, BTRFS_EXTENT_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+       ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff,
+                       dev_bytenr, BTRFS_DEV_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+       ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff,
+                       fs_bytenr, BTRFS_FS_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+       ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff,
+                       csum_bytenr, BTRFS_CSUM_TREE_OBJECTID);
+       if (ret < 0)
+               goto out;
+
+       ret = write_temp_extent_buffer(fd, buf, extent_bytenr);
+out:
+       free(buf);
+       return ret;
+}
+
+/*
+ * Improved version of make_btrfs().
+ *
+ * This one will
+ * 1) Do chunk allocation to avoid used data
+ *    And after this function, extent type matches chunk type
+ * 2) Better structured code
+ *    No super long hand written codes to initialized all tree blocks
+ *    Split into small blocks and reuse codes.
+ *    TODO: Reuse tree operation facilities by introducing new flags
+ */
+static int make_convert_btrfs(int fd, struct btrfs_mkfs_config *cfg,
+                             struct btrfs_convert_context *cctx)
+{
+       struct cache_tree *free = &cctx->free;
+       struct cache_tree *used = &cctx->used;
+       u64 sys_chunk_start;
+       u64 meta_chunk_start;
+       /* chunk tree bytenr, in system chunk */
+       u64 chunk_bytenr;
+       /* metadata trees bytenr, in metadata chunk */
+       u64 root_bytenr;
+       u64 extent_bytenr;
+       u64 dev_bytenr;
+       u64 fs_bytenr;
+       u64 csum_bytenr;
+       int ret;
+
+       /* Shouldn't happen */
+       BUG_ON(cache_tree_empty(used));
+
+       /*
+        * reserve space for temporary superblock first
+        * Here we allocate a little larger space, to keep later
+        * free space will be STRIPE_LEN aligned
+        */
+       ret = reserve_free_space(free, BTRFS_STRIPE_LEN,
+                                &cfg->super_bytenr);
+       if (ret < 0)
+               goto out;
+
+       /*
+        * Then reserve system chunk space
+        * TODO: Change system group size depending on cctx->total_bytes.
+        * If using current 4M, it can only handle less than one TB for
+        * worst case and then run out of sys space.
+        */
+       ret = reserve_free_space(free, BTRFS_MKFS_SYSTEM_GROUP_SIZE,
+                                &sys_chunk_start);
+       if (ret < 0)
+               goto out;
+       ret = reserve_free_space(free, BTRFS_CONVERT_META_GROUP_SIZE,
+                                &meta_chunk_start);
+       if (ret < 0)
+               goto out;
+
+       /*
+        * Allocated meta/sys chunks will be mapped 1:1 with device offset.
+        *
+        * Inside the allocated metadata chunk, the layout will be:
+        *  | offset            | contents      |
+        *  -------------------------------------
+        *  | +0                | tree root     |
+        *  | +nodesize         | extent root   |
+        *  | +nodesize * 2     | device root   |
+        *  | +nodesize * 3     | fs tree       |
+        *  | +nodesize * 4     | csum tree     |
+        *  -------------------------------------
+        * Inside the allocated system chunk, the layout will be:
+        *  | offset            | contents      |
+        *  -------------------------------------
+        *  | +0                | chunk root    |
+        *  -------------------------------------
+        */
+       chunk_bytenr = sys_chunk_start;
+       root_bytenr = meta_chunk_start;
+       extent_bytenr = meta_chunk_start + cfg->nodesize;
+       dev_bytenr = meta_chunk_start + cfg->nodesize * 2;
+       fs_bytenr = meta_chunk_start + cfg->nodesize * 3;
+       csum_bytenr = meta_chunk_start + cfg->nodesize * 4;
+
+       ret = setup_temp_super(fd, cfg, root_bytenr, chunk_bytenr);
+       if (ret < 0)
+               goto out;
+
+       ret = setup_temp_root_tree(fd, cfg, root_bytenr, extent_bytenr,
+                                  dev_bytenr, fs_bytenr, csum_bytenr);
+       if (ret < 0)
+               goto out;
+       ret = setup_temp_chunk_tree(fd, cfg, sys_chunk_start, meta_chunk_start,
+                                   chunk_bytenr);
+       if (ret < 0)
+               goto out;
+       ret = setup_temp_dev_tree(fd, cfg, sys_chunk_start, meta_chunk_start,
+                                 dev_bytenr);
+       if (ret < 0)
+               goto out;
+       ret = setup_temp_fs_tree(fd, cfg, fs_bytenr);
+       if (ret < 0)
+               goto out;
+       ret = setup_temp_csum_tree(fd, cfg, csum_bytenr);
+       if (ret < 0)
+               goto out;
+       /*
+        * Setup extent tree last, since it may need to read tree block key
+        * for non-skinny metadata case.
+        */
+       ret = setup_temp_extent_tree(fd, cfg, chunk_bytenr, root_bytenr,
+                                    extent_bytenr, dev_bytenr, fs_bytenr,
+                                    csum_bytenr);
+out:
+       return ret;
+}
+
+/*
  * @fs_uuid - if NULL, generates a UUID, returns back the new filesystem UUID
  */
-int make_btrfs(int fd, struct btrfs_mkfs_config *cfg)
+int make_btrfs(int fd, struct btrfs_mkfs_config *cfg,
+               struct btrfs_convert_context *cctx)
 {
        struct btrfs_super_block super;
        struct extent_buffer *buf;
@@ -209,6 +1031,8 @@ int make_btrfs(int fd, struct btrfs_mkfs_config *cfg)
                                 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
        u64 num_bytes;
 
+       if (cctx)
+               return make_convert_btrfs(fd, cfg, cctx);
        buf = malloc(sizeof(*buf) + max(cfg->sectorsize, cfg->nodesize));
        if (!buf)
                return -ENOMEM;
@@ -858,8 +1682,8 @@ out:
        return ret;
 }
 
-int btrfs_prepare_device(int fd, const char *file, int zero_end,
-               u64 *block_count_ret, u64 max_block_count, int discard)
+int btrfs_prepare_device(int fd, const char *file, u64 *block_count_ret,
+               u64 max_block_count, unsigned opflags)
 {
        u64 block_count;
        struct stat st;
@@ -879,15 +1703,16 @@ int btrfs_prepare_device(int fd, const char *file, int zero_end,
        if (max_block_count)
                block_count = min(block_count, max_block_count);
 
-       if (discard) {
+       if (opflags & PREP_DEVICE_DISCARD) {
                /*
                 * We intentionally ignore errors from the discard ioctl.  It
                 * is not necessary for the mkfs functionality but just an
                 * optimization.
                 */
                if (discard_range(fd, 0, 0) == 0) {
-                       printf("Performing full device TRIM (%s) ...\n",
-                               pretty_size(block_count));
+                       if (opflags & PREP_DEVICE_VERBOSE)
+                               printf("Performing full device TRIM (%s) ...\n",
+                                               pretty_size(block_count));
                        discard_blocks(fd, 0, block_count);
                }
        }
@@ -896,7 +1721,7 @@ int btrfs_prepare_device(int fd, const char *file, int zero_end,
        for (i = 0 ; !ret && i < BTRFS_SUPER_MIRROR_MAX; i++)
                ret = zero_dev_clamped(fd, btrfs_sb_offset(i),
                                       BTRFS_SUPER_INFO_SIZE, block_count);
-       if (!ret && zero_end)
+       if (!ret && (opflags & PREP_DEVICE_ZERO_END))
                ret = zero_dev_clamped(fd, block_count - ZERO_DEV_BYTES,
                                       ZERO_DEV_BYTES, block_count);
 
@@ -1991,6 +2816,8 @@ path:
        if (fd < 0)
                goto err;
        ret = lookup_ino_rootid(fd, &id);
+       if (ret)
+               error("failed to lookup root id: %s", strerror(-ret));
        close(fd);
        if (ret < 0)
                goto err;
@@ -2673,10 +3500,8 @@ int lookup_ino_rootid(int fd, u64 *rootid)
        args.objectid = BTRFS_FIRST_FREE_OBJECTID;
 
        ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
-       if (ret < 0) {
-               error("failed to lookup root id: %s", strerror(errno));
-               return ret;
-       }
+       if (ret < 0)
+               return -errno;
 
        *rootid = args.treeid;
 
@@ -3106,6 +3931,24 @@ void clean_args_no_options(int argc, char *argv[], const char * const *usagestr)
        }
 }
 
+/*
+ * Same as clean_args_no_options but pass through arguments that could look
+ * like short options. Eg. reisze which takes a negative resize argument like
+ * '-123M' .
+ *
+ * This accepts only two forms:
+ * - "-- option1 option2 ..."
+ * - "option1 option2 ..."
+ */
+void clean_args_no_options_relaxed(int argc, char *argv[], const char * const *usagestr)
+{
+       if (argc <= 1)
+               return;
+
+       if (strcmp(argv[1], "--") == 0)
+               optind = 2;
+}
+
 /* Subvolume helper functions */
 /*
  * test if name is a correct subvolume name